diff --git a/.dockerignore b/.dockerignore index 5d6d171fdecbf..3791cca95e3fe 100644 --- a/.dockerignore +++ b/.dockerignore @@ -61,3 +61,5 @@ !rust/datafusion/Cargo.toml !rust/datafusion/benches !rust/integration-testing/Cargo.toml +!go/go.mod +!go/go.sum \ No newline at end of file diff --git a/.env b/.env index 2f06cca474b00..4aa04daab040e 100644 --- a/.env +++ b/.env @@ -47,17 +47,19 @@ ULIMIT_CORE=-1 # Default versions for platforms ALMALINUX=8 +ALPINE_LINUX=3.16 DEBIAN=11 FEDORA=35 UBUNTU=20.04 # Default versions for various dependencies CLANG_TOOLS=12 -CUDA=9.1 +CUDA=11.0.3 DASK=latest DOTNET=6.0 GCC_VERSION="" GO=1.16 +STATICCHECK=v0.2.2 HDFS=3.2.1 JDK=8 KARTOTHEK=latest @@ -65,6 +67,7 @@ KARTOTHEK=latest LLVM=13 MAVEN=3.5.4 NODE=16 +NUMBA=latest NUMPY=latest PANDAS=latest PYTHON=3.8 diff --git a/.gitattributes b/.gitattributes index f897a36134daa..1a5b156b491fd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5,3 +5,5 @@ r/src/arrowExports.cpp linguist-generated=true r/man/*.Rd linguist-generated=true cpp/src/generated/*.h linguist-generated=true r/NEWS.md merge=union +go/**/*.s linguist-generated=true +go/arrow/unionmode_string.go linguist-generated=true diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index ba95fcd509cbd..d4c5c4c89799f 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -119,7 +119,7 @@ jobs: docker-compose run --rm minimal macos: - name: AMD64 MacOS 10.15 C++ + name: AMD64 macOS 11 C++ runs-on: macos-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 60 @@ -193,7 +193,7 @@ jobs: name: AMD64 ${{ matrix.name }} C++17 runs-on: ${{ matrix.os }} if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 45 + timeout-minutes: 60 strategy: fail-fast: false matrix: @@ -202,14 +202,12 @@ jobs: include: - os: windows-2019 name: Windows 2019 - generator: Visual Studio 16 2019 env: ARROW_BOOST_USE_SHARED: OFF ARROW_BUILD_BENCHMARKS: ON ARROW_BUILD_SHARED: ON ARROW_BUILD_STATIC: OFF ARROW_BUILD_TESTS: ON - ARROW_CXXFLAGS: "/std:c++17" ARROW_DATASET: ON ARROW_FLIGHT: OFF ARROW_HDFS: ON @@ -227,11 +225,13 @@ jobs: ARROW_WITH_ZLIB: ON ARROW_WITH_ZSTD: ON BOOST_SOURCE: BUNDLED - CMAKE_ARGS: '-A x64 -DOPENSSL_ROOT_DIR=C:\Program Files\OpenSSL-Win64' - CMAKE_GENERATOR: ${{ matrix.generator }} + CMAKE_CXX_STANDARD: "17" + CMAKE_GENERATOR: Ninja CMAKE_INSTALL_LIBDIR: bin CMAKE_INSTALL_PREFIX: /usr CMAKE_UNITY_BUILD: ON + OPENSSL_ROOT_DIR: >- + C:\Program Files\OpenSSL-Win64 NPROC: 3 steps: - name: Disable Crash Dialogs @@ -254,9 +254,30 @@ jobs: - name: Download Timezone Database shell: bash run: ci/scripts/download_tz_database.sh - - name: Build + - name: Install ccache shell: bash - run: ci/scripts/cpp_build.sh $(pwd) $(pwd)/build + run: | + ci/scripts/install_ccache.sh 4.6.2 /usr + - name: Setup ccache + shell: bash + run: | + ci/scripts/ccache_setup.sh + - name: ccache info + id: ccache-info + shell: bash + run: | + echo "::set-output name=cache-dir::$(ccache --get-config cache_dir)" + - name: Cache ccache + uses: actions/cache@v2 + with: + path: ${{ steps.ccache-info.outputs.cache-dir }} + key: cpp-ccache-windows-${{ hashFiles('cpp/**') }} + restore-keys: cpp-ccache-windows- + - name: Build + shell: cmd + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build" - name: Test shell: bash run: ci/scripts/cpp_test.sh $(pwd) $(pwd)/build diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index 385c081cc6c37..8f34db9708316 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -91,7 +91,7 @@ jobs: run: ci/scripts/csharp_test.sh $(pwd) macos: - name: AMD64 MacOS 10.15 C# ${{ matrix.dotnet }} + name: AMD64 macOS 11 C# ${{ matrix.dotnet }} runs-on: macos-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 15 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index cbbe067007d75..4112bf3bd4c75 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -50,9 +50,15 @@ jobs: strategy: fail-fast: false matrix: - go: [1.16] + go: [1.16, 1.18] + include: + - go: 1.16 + staticcheck: v0.2.2 + - go: 1.18 + staticcheck: latest env: GO: ${{ matrix.go }} + STATICCHECK: ${{ matrix.staticcheck }} steps: - name: Checkout Arrow uses: actions/checkout@v3 @@ -80,9 +86,15 @@ jobs: strategy: fail-fast: false matrix: - go: [1.16] + go: [1.16, 1.18] + include: + - go: 1.16 + staticcheck: v0.2.2 + - go: 1.18 + staticcheck: latest env: GO: ${{ matrix.go }} + STATICCHECK: ${{ matrix.staticcheck }} steps: - name: Checkout Arrow uses: actions/checkout@v3 @@ -111,9 +123,15 @@ jobs: strategy: fail-fast: false matrix: - go: [1.16] + go: [1.16, 1.18] + include: + - go: 1.16 + staticcheck: v0.2.2 + - go: 1.18 + staticcheck: latest env: GO: ${{ matrix.go }} + STATICCHECK: ${{ matrix.staticcheck }} steps: - name: Checkout Arrow uses: actions/checkout@v3 @@ -140,19 +158,26 @@ jobs: strategy: fail-fast: false matrix: - go: [1.16] - steps: - - name: Install go - uses: actions/setup-go@v1 - with: - go-version: ${{ matrix.go }} + go: [1.16, 1.18] + include: + - go: 1.16 + staticcheck: v0.2.2 + - go: 1.18 + staticcheck: latest + steps: - name: Checkout Arrow uses: actions/checkout@v3 with: fetch-depth: 0 submodules: recursive + - name: Install go + uses: actions/setup-go@v3 + with: + go-version: ${{ matrix.go }} + cache: true + cache-dependency-path: go/go.sum - name: Install staticcheck - run: go install honnef.co/go/tools/cmd/staticcheck@v0.2.2 + run: go install honnef.co/go/tools/cmd/staticcheck@${{ matrix.staticcheck }} - name: Build shell: bash run: ci/scripts/go_build.sh $(pwd) @@ -161,26 +186,33 @@ jobs: run: ci/scripts/go_test.sh $(pwd) macos: - name: AMD64 MacOS 10.15 Go ${{ matrix.go }} + name: AMD64 macOS 11 Go ${{ matrix.go }} runs-on: macos-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 15 strategy: fail-fast: false matrix: - go: [1.16] - steps: - - name: Install go - uses: actions/setup-go@v1 - with: - go-version: ${{ matrix.go }} + go: [1.16, 1.18] + include: + - go: 1.16 + staticcheck: v0.2.2 + - go: 1.18 + staticcheck: latest + steps: - name: Checkout Arrow uses: actions/checkout@v3 with: fetch-depth: 0 submodules: recursive + - name: Install go + uses: actions/setup-go@v3 + with: + go-version: ${{ matrix.go }} + cache: true + cache-dependency-path: go/go.sum - name: Install staticcheck - run: go install honnef.co/go/tools/cmd/staticcheck@v0.2.2 + run: go install honnef.co/go/tools/cmd/staticcheck@${{ matrix.staticcheck }} - name: Build shell: bash run: ci/scripts/go_build.sh $(pwd) @@ -189,31 +221,38 @@ jobs: run: ci/scripts/go_test.sh $(pwd) macos-cgo: - name: AMD64 MacOS 10.15 Go ${{ matrix.go }} - CGO + name: AMD64 macOS 11 Go ${{ matrix.go }} - CGO runs-on: macos-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 60 strategy: fail-fast: false matrix: - go: [1.16] + go: [1.16, 1.18] + include: + - go: 1.16 + staticcheck: v0.2.2 + - go: 1.18 + staticcheck: latest env: ARROW_GO_TESTCGO: "1" - steps: - - name: Install go - uses: actions/setup-go@v1 - with: - go-version: ${{ matrix.go }} + steps: - name: Checkout Arrow uses: actions/checkout@v3 with: fetch-depth: 0 submodules: recursive + - name: Install go + uses: actions/setup-go@v3 + with: + go-version: ${{ matrix.go }} + cache: true + cache-dependency-path: go/go.sum - name: Brew Install Arrow shell: bash run: brew install apache-arrow - name: Install staticcheck - run: go install honnef.co/go/tools/cmd/staticcheck@v0.2.2 + run: go install honnef.co/go/tools/cmd/staticcheck@${{ matrix.staticcheck }} - name: Build shell: bash run: ci/scripts/go_build.sh $(pwd) @@ -264,11 +303,13 @@ jobs: echo "CGO_LDFLAGS=-g -O2 -L$(cygpath --windows ${MINGW_PREFIX}/lib) -L$(cygpath --windows ${MINGW_PREFIX}/bin)" >> $GITHUB_ENV echo "MINGW_PREFIX=$(cygpath --windows ${MINGW_PREFIX})" >> $GITHUB_ENV - name: Install go - uses: actions/setup-go@v2 + uses: actions/setup-go@v3 with: - go-version: '1.17' + go-version: '1.18' + cache: true + cache-dependency-path: go/go.sum - name: Install staticcheck - run: go install honnef.co/go/tools/cmd/staticcheck@v0.2.2 + run: go install honnef.co/go/tools/cmd/staticcheck@latest - name: Build shell: bash run: ci/scripts/go_build.sh $(pwd) diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index 1cba0104899b2..e5ef6d13fe394 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -101,7 +101,7 @@ jobs: run: archery docker push ${{ matrix.image }} macos: - name: AMD64 MacOS 10.15 Java JDK ${{ matrix.jdk }} + name: AMD64 macOS 11 Java JDK ${{ matrix.jdk }} runs-on: macos-latest if: github.event_name == 'push' timeout-minutes: 30 @@ -125,3 +125,30 @@ jobs: - name: Test shell: bash run: ci/scripts/java_test.sh $(pwd) $(pwd)/build + + windows: + name: AMD64 Windows Server 2022 Java JDK ${{ matrix.jdk }} + runs-on: windows-latest + if: ${{ !contains(github.event.pull_request.title, 'WIP') }} + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + jdk: [11] + steps: + - name: Set up Java + uses: actions/setup-java@v3 + with: + java-version: ${{ matrix.jdk }} + distribution: 'temurin' + - name: Checkout Arrow + uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: recursive + - name: Build + shell: bash + run: ci/scripts/java_build.sh $(pwd) $(pwd)/build + - name: Test + shell: bash + run: ci/scripts/java_test.sh $(pwd) $(pwd)/build diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index 07cc3b1265212..64afc2de3e581 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -47,7 +47,7 @@ env: jobs: docker: - name: AMD64 Debian 9 Java JNI (Gandiva, Plasma, ORC, Dataset) + name: AMD64 manylinux2014 Java JNI runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 90 @@ -61,8 +61,8 @@ jobs: uses: actions/cache@v2 with: path: .docker - key: maven-${{ hashFiles('java/**') }} - restore-keys: maven- + key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }} + restore-keys: java-jni-manylinux-2014- - name: Setup Python uses: actions/setup-python@v4 with: @@ -70,14 +70,14 @@ jobs: - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build - run: archery docker run debian-java-jni + run: archery docker run java-jni-manylinux-2014 - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' continue-on-error: true - run: archery docker push debian-java-jni + run: archery docker push java-jni-manylinux-2014 docker_integration_python: - name: AMD64 Debian 9 Java C Data Interface Integration + name: AMD64 Conda Java C Data Interface Integration runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 90 diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index 18d54c5b4ef43..0d7263e8a73e1 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -68,7 +68,7 @@ jobs: run: archery docker push debian-js macos: - name: AMD64 MacOS 10.15 NodeJS ${{ matrix.node }} + name: AMD64 macOS 11 NodeJS ${{ matrix.node }} runs-on: macos-latest if: github.event_name == 'push' timeout-minutes: 60 diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml index 3780ba113ab40..477582edb8889 100644 --- a/.github/workflows/matlab.yml +++ b/.github/workflows/matlab.yml @@ -69,11 +69,11 @@ jobs: with: select-by-folder: matlab/test macos: - name: AMD64 MacOS 10.15 MATLAB + name: AMD64 macOS 11 MATLAB runs-on: macos-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} steps: - - name: Check out repository + - name: Check out repository uses: actions/checkout@v3 with: fetch-depth: 0 diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index fe834a55e6ef9..5ccbceabea74a 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -113,7 +113,7 @@ jobs: run: archery docker push ${{ matrix.image }} macos: - name: AMD64 MacOS 10.15 Python 3 + name: AMD64 macOS 11 Python 3 runs-on: macos-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 60 diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 4a9c605e3bce0..4f706e3e5b117 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -327,6 +327,14 @@ jobs: shell: Rscript {0} working-directory: r run: | + Sys.setenv( + RWINLIB_LOCAL = file.path(Sys.getenv("GITHUB_WORKSPACE"), "r", "windows", "libarrow.zip"), + MAKEFLAGS = paste0("-j", parallel::detectCores()), + ARROW_R_DEV = TRUE, + "_R_CHECK_FORCE_SUGGESTS_" = FALSE + ) + # we use pak for package installation since it is faster, safer and more convenient + pak::local_install() pak::pak("lintr") lintr::expect_lint_free() - name: Dump install logs diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index bf49376c6fd9a..3f877b4aa3035 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -102,7 +102,7 @@ jobs: run: archery docker push ubuntu-ruby macos: - name: AMD64 MacOS 10.15 GLib & Ruby + name: AMD64 macOS 11 GLib & Ruby runs-on: macos-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 60 diff --git a/.gitignore b/.gitignore index 1406c30689f8c..103889cb9bc01 100644 --- a/.gitignore +++ b/.gitignore @@ -87,3 +87,6 @@ cpp/Brewfile.lock.json java-dist/ java-native-c/ java-native-cpp/ + +# archery files +dev/archery/build \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7fd35b1aa8802..7311b5a9a3f94 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: entry: --entrypoint /bin/hadolint hadolint/hadolint:latest - exclude: ^dev/.*$ - repo: https://github.com/pycqa/flake8 - rev: 4.0.1 + rev: 5.0.3 hooks: - id: flake8 name: Python Format diff --git a/LICENSE.txt b/LICENSE.txt index 843cf4f6a5e90..a82c22aeceaaa 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1990,12 +1990,14 @@ for PyArrow. Ibis is released under the Apache License, Version 2.0. This project includes code from the autobrew project. -* r/tools/autobrew and dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb - are based on code from the autobrew project. +The following files are based on code from the autobrew project: +* r/tools/autobrew +* dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb +* dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb Copyright (c) 2019, Jeroen Ooms License: MIT -Homepage: https://github.com/jeroen/autobrew +Homepage: https://github.com/autobrew/ -------------------------------------------------------------------------------- diff --git a/appveyor.yml b/appveyor.yml index 03a3597c9b7ba..b6ba6e6e1a487 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -16,7 +16,7 @@ # under the License. # Operating system (build VM template) -os: Visual Studio 2017 +os: Visual Studio 2019 only_commits: # Skip commits not related to Python or C++ @@ -36,35 +36,24 @@ matrix: environment: global: - # Make these variables visible in all jobs and build steps - MSVC_DEFAULT_OPTIONS: ON APPVEYOR_SAVE_CACHE_ON_ERROR: true + MSVC_DEFAULT_OPTIONS: ON + # Change the clcache dir to reset caches everywhere when a setting # is changed incompatibly (e.g. CLCACHE_COMPRESS). CLCACHE_DIR: C:\Users\Appveyor\clcache1 CLCACHE_SERVER: 1 CLCACHE_COMPRESS: 1 CLCACHE_COMPRESSLEVEL: 6 - ARROW_BUILD_FLIGHT: "OFF" - ARROW_BUILD_FLIGHT_SQL: "OFF" - ARROW_BUILD_GANDIVA: "OFF" - ARROW_LLVM_VERSION: "7.0.*" - ARROW_S3: "OFF" - PYTHON: "3.8" - ARCH: "64" - matrix: - # NOTE: clcache seems to work best with Ninja and worst with msbuild - # (as generated by cmake) - - JOB: "Toolchain" - GENERATOR: Ninja - ARROW_GCS: "ON" - ARROW_S3: "ON" - ARROW_BUILD_FLIGHT: "ON" - ARROW_BUILD_FLIGHT_SQL: "ON" - ARROW_BUILD_GANDIVA: "ON" - - JOB: "Build_Debug" - GENERATOR: Ninja + ARCH: "64" + ARROW_BUILD_FLIGHT: "ON" + ARROW_BUILD_FLIGHT_SQL: "ON" + ARROW_BUILD_GANDIVA: "ON" + ARROW_GCS: "ON" + ARROW_S3: "ON" + GENERATOR: Ninja + PYTHON: "3.8" before_build: - call ci\appveyor-cpp-setup.bat diff --git a/c_glib/meson.build b/c_glib/meson.build index 14f3a6225b4d7..85d3a75d42381 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -24,7 +24,7 @@ project('arrow-glib', 'c', 'cpp', 'cpp_std=c++11', ]) -version = '9.0.0-SNAPSHOT' +version = '10.0.0-SNAPSHOT' if version.endswith('-SNAPSHOT') version_numbers = version.split('-')[0].split('.') version_tag = version.split('-')[1] diff --git a/c_glib/test/test-list-scalar.rb b/c_glib/test/test-list-scalar.rb index 0ddbf60bc05e3..06633ee3bb48a 100644 --- a/c_glib/test/test-list-scalar.rb +++ b/c_glib/test/test-list-scalar.rb @@ -41,17 +41,8 @@ def test_equal end def test_to_s - assert_equal(<<-LIST.strip, @scalar.to_s) -[ - [ - [ - 1, - 2, - 3 - ] - ] -] - LIST + assert_equal("list>[list[1, 2, 3]]", + @scalar.to_s) end def test_value diff --git a/c_glib/test/test-map-scalar.rb b/c_glib/test/test-map-scalar.rb index 1e004569ef38e..541a7b32ea48b 100644 --- a/c_glib/test/test-map-scalar.rb +++ b/c_glib/test/test-map-scalar.rb @@ -57,18 +57,7 @@ def test_equal def test_to_s assert_equal(<<-MAP.strip, @scalar.to_s) -[ - keys: - [ - "hello", - "world" - ] - values: - [ - 1, - 2 - ] -] +map[{key:string = hello, value:int8 = 1}, {key:string = world, value:int8 = 2}] MAP end diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index e2c2d800d0ffe..4b90e49257c5e 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -31,51 +31,13 @@ set ARROW_DEBUG_MEMORY_POOL=trap set CMAKE_BUILD_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS% set CTEST_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS% -@rem -@rem In the configurations below we disable building the Arrow static library -@rem to save some time. Unfortunately this will still build the Parquet static -@rem library because of PARQUET-1420 (Thrift-generated symbols not exported in DLL). -@rem -if "%JOB%" == "Build_Debug" ( - mkdir cpp\build-debug - pushd cpp\build-debug - - cmake -G "%GENERATOR%" ^ - -DARROW_BOOST_USE_SHARED=OFF ^ - -DARROW_BUILD_EXAMPLES=ON ^ - -DARROW_BUILD_STATIC=OFF ^ - -DARROW_BUILD_TESTS=ON ^ - -DARROW_CXXFLAGS="/MP" ^ - -DARROW_ENABLE_TIMING_TESTS=OFF ^ - -DARROW_USE_PRECOMPILED_HEADERS=OFF ^ - -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^ - -DCMAKE_BUILD_TYPE="Debug" ^ - -DCMAKE_UNITY_BUILD=ON ^ - .. || exit /B - - cmake --build . --config Debug || exit /B - ctest --output-on-failure || exit /B - popd - - @rem Finish Debug build successfully - exit /B 0 -) call activate arrow -@rem Use Boost from Anaconda -set BOOST_ROOT=%CONDA_PREFIX%\Library -set BOOST_LIBRARYDIR=%CONDA_PREFIX%\Library\lib - @rem The "main" C++ build script for Windows CI @rem (i.e. for usual configurations) -if "%JOB%" == "Toolchain" ( - set CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON -) else ( - @rem We're in a conda environment but don't want to use it for the dependencies - set CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=AUTO -) +set CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON @rem Enable warnings-as-errors set ARROW_CXXFLAGS=/WX /MP @@ -119,6 +81,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^ -DCMAKE_BUILD_TYPE="Release" ^ -DCMAKE_CXX_COMPILER=clcache ^ -DCMAKE_CXX_FLAGS_RELEASE="/MD /Od /UNDEBUG" ^ + -DCMAKE_CXX_STANDARD=17 ^ -DCMAKE_INSTALL_PREFIX=%CONDA_PREFIX%\Library ^ -DCMAKE_UNITY_BUILD=ON ^ -DCMAKE_VERBOSE_MAKEFILE=OFF ^ diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat index 1fa126cb0d243..f9390e9be5ae5 100644 --- a/ci/appveyor-cpp-setup.bat +++ b/ci/appveyor-cpp-setup.bat @@ -17,9 +17,7 @@ @echo on -set "PATH=C:\Miniconda37-x64;C:\Miniconda37-x64\Scripts;C:\Miniconda37-x64\Library\bin;%PATH%" -set BOOST_ROOT=C:\Libraries\boost_1_67_0 -set BOOST_LIBRARYDIR=C:\Libraries\boost_1_67_0\lib64-msvc-14.0 +set "PATH=C:\Miniconda38-x64;C:\Miniconda38-x64\Scripts;C:\Miniconda38-x64\Library\bin;%PATH%" @rem @rem Avoid picking up AppVeyor-installed OpenSSL (linker errors with gRPC) @@ -31,6 +29,8 @@ rd /s /q C:\OpenSSL-v11-Win32 rd /s /q C:\OpenSSL-v11-Win64 rd /s /q C:\OpenSSL-v111-Win32 rd /s /q C:\OpenSSL-v111-Win64 +rd /s /q C:\OpenSSL-v30-Win32 +rd /s /q C:\OpenSSL-v30-Win64 @rem @rem Configure miniconda @@ -52,9 +52,8 @@ conda install -q -y -c conda-forge mamba python=3.9 || exit /B mamba update -q -y -c conda-forge --all || exit /B @rem -@rem Create conda environment for Build and Toolchain jobs +@rem Create conda environment @rem -@rem Avoid Boost 1.70 because of https://github.com/boostorg/process/issues/85 set CONDA_PACKAGES= @@ -62,37 +61,26 @@ if "%ARROW_BUILD_GANDIVA%" == "ON" ( @rem Install llvmdev in the toolchain if building gandiva.dll set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_gandiva_win.txt ) -if "%JOB%" == "Toolchain" ( - @rem Install pre-built "toolchain" packages for faster builds - set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt -) -if "%JOB%" NEQ "Build_Debug" ( - @rem Arrow conda environment is only required for the Build and Toolchain jobs - mamba create -n arrow -q -y -c conda-forge ^ - --file=ci\conda_env_python.txt ^ - %CONDA_PACKAGES% ^ - "cmake" ^ - "ninja" ^ - "nomkl" ^ - "pandas" ^ - "fsspec" ^ - "python=%PYTHON%" ^ - || exit /B -) +@rem Install pre-built "toolchain" packages for faster builds +set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt +@rem Arrow conda environment +mamba create -n arrow -q -y -c conda-forge ^ + --file=ci\conda_env_python.txt ^ + %CONDA_PACKAGES% ^ + "cmake" ^ + "ninja" ^ + "nomkl" ^ + "pandas" ^ + "fsspec" ^ + "python=%PYTHON%" ^ + || exit /B @rem @rem Configure compiler @rem -if "%GENERATOR%"=="Ninja" set need_vcvarsall=1 -if defined need_vcvarsall ( - if "%APPVEYOR_BUILD_WORKER_IMAGE%" NEQ "Visual Studio 2017" ( - @rem ARROW-14070 Visual Studio 2015 no longer supported - exit /B - ) - call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 - set CC=cl.exe - set CXX=cl.exe -) +call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 +set CC=cl.exe +set CXX=cl.exe @rem @rem Use clcache for faster builds @@ -109,7 +97,7 @@ powershell.exe -Command "Start-Process clcache-server" || exit /B @rem Download Minio somewhere on PATH, for unit tests @rem if "%ARROW_S3%" == "ON" ( - appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z -FileName C:\Windows\Minio.exe || exit /B + appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z -FileName C:\Windows\Minio.exe || exit /B ) diff --git a/ci/conan/all/conandata.yml b/ci/conan/all/conandata.yml index 1e32a6f44688a..942a3eba7a6c6 100644 --- a/ci/conan/all/conandata.yml +++ b/ci/conan/all/conandata.yml @@ -21,6 +21,12 @@ # SOFTWARE. sources: + "8.0.1": + url: "https://github.com/apache/arrow/archive/apache-arrow-8.0.1.tar.gz" + sha256: "e4c86329be769f2c8778aacc8d6220a9a13c90d59d4988f9349d51299dacbd11" + "8.0.0": + url: "https://github.com/apache/arrow/archive/apache-arrow-8.0.0.tar.gz" + sha256: "19ece12de48e51ce4287d2dee00dc358fbc5ff02f41629d16076f77b8579e272" "7.0.0": url: "https://github.com/apache/arrow/archive/apache-arrow-7.0.0.tar.gz" sha256: "57e13c62f27b710e1de54fd30faed612aefa22aa41fa2c0c3bacd204dd18a8f3" @@ -31,6 +37,28 @@ sources: url: "https://github.com/apache/arrow/archive/apache-arrow-1.0.0.tar.gz" sha256: "08fbd4c633c08939850d619ca0224c75d7a0526467c721c0838b8aa7efccb270" patches: + "8.0.1": + - base_path: "source_subfolder" + patch_file: "patches/8.0.0-0001-cmake.patch" + - base_path: "source_subfolder" + patch_file: "patches/8.0.0-0002-jemalloc.patch" + - base_path: "source_subfolder" + patch_file: "patches/8.0.0-0003-mallctl-takes-size_t.patch" + - base_path: "source_subfolder" + patch_file: "patches/8.0.0-0004-use-find-package.patch" + - base_path: "source_subfolder" + patch_file: "patches/8.0.0-0005-install-utils.patch" + "8.0.0": + - base_path: "source_subfolder" + patch_file: "patches/8.0.0-0001-cmake.patch" + - base_path: "source_subfolder" + patch_file: "patches/8.0.0-0002-jemalloc.patch" + - base_path: "source_subfolder" + patch_file: "patches/8.0.0-0003-mallctl-takes-size_t.patch" + - base_path: "source_subfolder" + patch_file: "patches/8.0.0-0004-use-find-package.patch" + - base_path: "source_subfolder" + patch_file: "patches/8.0.0-0005-install-utils.patch" "7.0.0": - base_path: "source_subfolder" patch_file: "patches/7.0.0-0001-cmake.patch" @@ -38,10 +66,10 @@ patches: patch_file: "patches/7.0.0-0002-jemalloc.patch" - base_path: "source_subfolder" patch_file: "patches/7.0.0-0003-mallctl-takes-size_t.patch" - - base_path: "source_subfolder" - patch_file: "patches/7.0.0-0004-remove-find-modules.patch" - base_path: "source_subfolder" patch_file: "patches/7.0.0-0005-use-find-package.patch" + - base_path: "source_subfolder" + patch_file: "patches/7.0.0-0006-install-utils.patch" "2.0.0": - base_path: "source_subfolder" patch_file: "patches/2.0.0-0001-cmake.patch" diff --git a/ci/conan/all/conanfile.py b/ci/conan/all/conanfile.py index 97acd839cd3d5..a87478d6e40de 100644 --- a/ci/conan/all/conanfile.py +++ b/ci/conan/all/conanfile.py @@ -23,7 +23,7 @@ from conans import ConanFile, tools, CMake from conans.errors import ConanInvalidConfiguration import os - +import glob required_conan_version = ">=1.33.0" @@ -31,21 +31,22 @@ class ArrowConan(ConanFile): name = "arrow" description = "Apache Arrow is a cross-language development platform for in-memory data" - topics = ("arrow", "memory") + license = ("Apache-2.0",) url = "https://github.com/conan-io/conan-center-index" homepage = "https://arrow.apache.org/" - license = ("Apache-2.0",) - generators = "cmake", "cmake_find_package_multi" - settings = "os", "compiler", "build_type", "arch" + topics = ("memory", "gandiva", "parquet", "skyhook", "plasma", "hdfs", "csv", "cuda", "gcs", "json", "hive", "s3", "grpc") + settings = "os", "arch", "compiler", "build_type" options = { "shared": [True, False], "fPIC": [True, False], "gandiva": [True, False], - "parquet": [True, False], + "parquet": ["auto", True, False], + "substrait": [True, False], + "skyhook": [True, False], "plasma": [True, False], "cli": [True, False], "compute": ["auto", True, False], - "dataset_modules": [True, False], + "dataset_modules": ["auto", True, False], "deprecated": [True, False], "encryption": [True, False], "filesystem_layer": [True, False], @@ -56,7 +57,9 @@ class ArrowConan(ConanFile): "with_boost": ["auto", True, False], "with_csv": [True, False], "with_cuda": [True, False], - "with_flight_rpc": [True, False], + "with_flight_rpc": ["auto", True, False], + "with_flight_sql": [True, False], + "with_gcs": [True, False], "with_gflags": ["auto", True, False], "with_glog": ["auto", True, False], "with_grpc": ["auto", True, False], @@ -64,6 +67,7 @@ class ArrowConan(ConanFile): "with_json": [True, False], "with_llvm": ["auto", True, False], "with_openssl": ["auto", True, False], + "with_opentelemetry": [True, False], "with_orc": [True, False], "with_protobuf": ["auto", True, False], "with_re2": ["auto", True, False], @@ -80,11 +84,13 @@ class ArrowConan(ConanFile): "shared": False, "fPIC": True, "gandiva": False, - "parquet": False, + "parquet": "auto", + "skyhook": False, + "substrait": False, "plasma": False, "cli": False, "compute": "auto", - "dataset_modules": False, + "dataset_modules": "auto", "deprecated": True, "encryption": False, "filesystem_layer": False, @@ -97,7 +103,9 @@ class ArrowConan(ConanFile): "with_bz2": False, "with_csv": False, "with_cuda": False, - "with_flight_rpc": False, + "with_flight_rpc": "auto", + "with_flight_sql": False, + "with_gcs": False, "with_gflags": "auto", "with_jemalloc": "auto", "with_glog": "auto", @@ -105,6 +113,7 @@ class ArrowConan(ConanFile): "with_json": False, "with_llvm": "auto", "with_openssl": "auto", + "with_opentelemetry": False, "with_orc": False, "with_protobuf": "auto", "with_re2": "auto", @@ -115,6 +124,8 @@ class ArrowConan(ConanFile): "with_zlib": False, "with_zstd": False, } + generators = "cmake", "cmake_find_package_multi" + short_paths = True _cmake = None @@ -135,6 +146,14 @@ def config_options(self): del self.options.runtime_simd_level elif tools.Version(self.version) < "6.0.0": self.options.simd_level = "sse4_2" + if tools.Version(self.version) < "6.0.0": + del self.options.with_gcs + if tools.Version(self.version) < "7.0.0": + del self.options.skyhook + del self.options.with_flight_sql + del self.options.with_opentelemetry + if tools.Version(self.version) < "8.0.0": + del self.options.substrait def validate(self): if self.settings.compiler == "clang" and self.settings.compiler.version <= tools.Version("3.9"): @@ -144,6 +163,12 @@ def validate(self): del self.options.fPIC if self.options.compute == False and not self._compute(True): raise ConanInvalidConfiguration("compute options is required (or choose auto)") + if self.options.parquet == False and self._parquet(True): + raise ConanInvalidConfiguration("parquet options is required (or choose auto)") + if self.options.dataset_modules == False and self._dataset_modules(True): + raise ConanInvalidConfiguration("dataset_modules options is required (or choose auto)") + if self.options.get_safe("skyhook", False): + raise ConanInvalidConfiguration("CCI has no librados recipe (yet)") if self.options.with_jemalloc == False and self._with_jemalloc(True): raise ConanInvalidConfiguration("with_jemalloc option is required (or choose auto)") if self.options.with_re2 == False and self._with_re2(True): @@ -152,6 +177,8 @@ def validate(self): raise ConanInvalidConfiguration("with_protobuf option is required (or choose auto)") if self.options.with_gflags == False and self._with_gflags(True): raise ConanInvalidConfiguration("with_gflags options is required (or choose auto)") + if self.options.with_flight_rpc == False and self._with_flight_rpc(True): + raise ConanInvalidConfiguration("with_flight_rpc options is required (or choose auto)") if self.options.with_grpc == False and self._with_grpc(True): raise ConanInvalidConfiguration("with_grpc options is required (or choose auto)") if self.options.with_boost == False and self._with_boost(True): @@ -176,10 +203,22 @@ def validate(self): def _compute(self, required=False): if required or self.options.compute == "auto": - return bool(self.options.dataset_modules) or bool(self.options.parquet) + return bool(self._parquet()) or bool(self._dataset_modules()) or bool(self.options.get_safe("substrait", False)) else: return bool(self.options.compute) + def _parquet(self, required=False): + if required or self.options.parquet == "auto": + return bool(self.options.get_safe("substrait", False)) + else: + return bool(self.options.parquet) + + def _dataset_modules(self, required=False): + if required or self.options.dataset_modules == "auto": + return bool(self.options.get_safe("substrait", False)) + else: + return bool(self.options.dataset_modules) + def _with_jemalloc(self, required=False): if required or self.options.with_jemalloc == "auto": return bool("BSD" in str(self.settings.os)) @@ -194,10 +233,16 @@ def _with_re2(self, required=False): def _with_protobuf(self, required=False): if required or self.options.with_protobuf == "auto": - return bool(self.options.gandiva or self.options.with_flight_rpc or self.options.with_orc) + return bool(self.options.gandiva or self._with_flight_rpc() or self.options.with_orc or self.options.get_safe("substrait", False)) else: return bool(self.options.with_protobuf) + def _with_flight_rpc(self, required=False): + if required or self.options.with_flight_rpc == "auto": + return bool(self.options.get_safe("with_flight_sql", False)) + else: + return bool(self.options.with_flight_rpc) + def _with_gflags(self, required=False): if required or self.options.with_gflags == "auto": return bool(self.options.plasma or self._with_glog() or self._with_grpc()) @@ -212,7 +257,7 @@ def _with_glog(self, required=False): def _with_grpc(self, required=False): if required or self.options.with_grpc == "auto": - return bool(self.options.with_flight_rpc) + return self._with_flight_rpc() else: return bool(self.options.with_grpc) @@ -222,7 +267,7 @@ def _with_boost(self, required=False): return True version = tools.Version(self.version) if version.major == "1": - if self.options.parquet and self.settings.compiler == "gcc" and self.settings.compiler.version < tools.Version("4.9"): + if self._parquet() and self.settings.compiler == "gcc" and self.settings.compiler.version < tools.Version("4.9"): return True elif version.major >= "2": if self.settings.compiler == "Visual Studio": @@ -233,11 +278,11 @@ def _with_boost(self, required=False): def _with_thrift(self, required=False): # No self.options.with_thift exists - return bool(required or self.options.parquet) + return bool(required or self._parquet()) def _with_utf8proc(self, required=False): if required or self.options.with_utf8proc == "auto": - return False + return bool(self._compute() or self.options.gandiva) else: return bool(self.options.with_utf8proc) @@ -245,19 +290,19 @@ def _with_llvm(self, required=False): if required or self.options.with_llvm == "auto": return bool(self.options.gandiva) else: - return bool(self.options.with_openssl) + return bool(self.options.with_llvm) def _with_openssl(self, required=False): if required or self.options.with_openssl == "auto": - return bool(self.options.encryption or self.options.with_flight_rpc or self.options.with_s3) + return bool(self.options.encryption or self._with_flight_rpc() or self.options.with_s3) else: return bool(self.options.with_openssl) def requirements(self): if self._with_thrift(): - self.requires("thrift/0.15.0") + self.requires("thrift/0.16.0") if self._with_protobuf(): - self.requires("protobuf/3.20.0") + self.requires("protobuf/3.21.1") if self._with_jemalloc(): self.requires("jemalloc/5.2.1") if self._with_boost(): @@ -266,18 +311,22 @@ def requirements(self): self.requires("gflags/2.2.2") if self._with_glog(): self.requires("glog/0.6.0") + if self.options.get_safe("with_gcs"): + self.requires("google-cloud-cpp/1.40.1") if self._with_grpc(): - self.requires("grpc/1.45.2") + self.requires("grpc/1.47.0") if self.options.with_json: self.requires("rapidjson/1.1.0") if self._with_llvm(): self.requires("llvm-core/13.0.0") if self._with_openssl(): - # aws-sdk-cpp requires openssl/1.1.1. it uses deprecated functions in openssl/3.0.0 - if self.options.with_s3: - self.requires("openssl/1.1.1o") + # aws-sdk-cpp/grpc requires openssl/1.1.1. it uses deprecated functions in openssl/3.0.0 + if self.options.with_s3 or self._with_flight_rpc(): + self.requires("openssl/1.1.1q") else: - self.requires("openssl/3.0.3") + self.requires("openssl/3.0.5") + if self.options.get_safe("with_opentelemetry"): + self.requires("opentelemetry-cpp/1.4.1") if self.options.with_s3: self.requires("aws-sdk-cpp/1.9.234") if self.options.with_brotli: @@ -312,6 +361,8 @@ def source(self): top_level = os.environ.get("ARROW_HOME") shutil.copytree(os.path.join(top_level, "cpp"), os.path.join(self._source_subfolder, "cpp")) + shutil.copytree(os.path.join(top_level, "format"), + os.path.join(self._source_subfolder, "format")) top_level_files = [ ".env", "LICENSE.txt", @@ -341,9 +392,10 @@ def _configure_cmake(self): self._cmake.definitions["ARROW_DEFINE_OPTIONS"] = True self._cmake.definitions["ARROW_DEPENDENCY_SOURCE"] = "SYSTEM" self._cmake.definitions["ARROW_GANDIVA"] = self.options.gandiva - self._cmake.definitions["ARROW_PARQUET"] = self.options.parquet + self._cmake.definitions["ARROW_PARQUET"] = self._parquet() + self._cmake.definitions["ARROW_SUBSTRAIT"] = self.options.get_safe("substrait", False) self._cmake.definitions["ARROW_PLASMA"] = self.options.plasma - self._cmake.definitions["ARROW_DATASET"] = self.options.dataset_modules + self._cmake.definitions["ARROW_DATASET"] = self._dataset_modules() self._cmake.definitions["ARROW_FILESYSTEM"] = self.options.filesystem_layer self._cmake.definitions["PARQUET_REQUIRE_ENCRYPTION"] = self.options.encryption self._cmake.definitions["ARROW_HDFS"] = self.options.hdfs_bridgs @@ -351,7 +403,8 @@ def _configure_cmake(self): self._cmake.definitions["ARROW_BUILD_SHARED"] = self.options.shared self._cmake.definitions["ARROW_BUILD_STATIC"] = not self.options.shared self._cmake.definitions["ARROW_NO_DEPRECATED_API"] = not self.options.deprecated - self._cmake.definitions["ARROW_FLIGHT"] = self.options.with_flight_rpc + self._cmake.definitions["ARROW_FLIGHT"] = self._with_flight_rpc() + self._cmake.definitions["ARROW_FLIGHT_SQL"] = self.options.get_safe("with_flight_sql", False) self._cmake.definitions["ARROW_COMPUTE"] = self._compute() self._cmake.definitions["ARROW_CSV"] = self.options.with_csv self._cmake.definitions["ARROW_CUDA"] = self.options.with_cuda @@ -444,6 +497,24 @@ def _configure_cmake(self): def _patch_sources(self): for patch in self.conan_data.get("patches", {}).get(self.version, []): tools.patch(**patch) + # if tools.Version(self.version) >= "7.0.0": + # for filename in glob.glob(os.path.join(self._source_subfolder, "cpp", "cmake_modules", "Find*.cmake")): + # if os.path.basename(filename) not in [ + # "FindArrow.cmake", + # "FindArrowCUDA.cmake", + # "FindArrowDataset.cmake", + # "FindArrowFlight.cmake", + # "FindArrowFlightSql.cmake", + # "FindArrowFlightTesting.cmake", + # "FindArrowPython.cmake", + # "FindArrowPythonFlight.cmake", + # "FindArrowSubstrait.cmake", + # "FindArrowTesting.cmake", + # "FindGandiva.cmake", + # "FindParquet.cmake", + # "FindPlasma.cmake", + # ]: + # os.remove(filename) def build(self): self._patch_sources() @@ -486,16 +557,23 @@ def package_info(self): self.cpp_info.components["libarrow"].names["pkg_config"] = "arrow" if not self.options.shared: self.cpp_info.components["libarrow"].defines = ["ARROW_STATIC"] - if self.settings.os == "Linux": - self.cpp_info.components["libarrow"].system_libs = ["pthread"] + if self.settings.os in ["Linux", "FreeBSD"]: + self.cpp_info.components["libarrow"].system_libs = ["pthread", "m", "dl", "rt"] - if self.options.parquet: + if self._parquet(): self.cpp_info.components["libparquet"].libs = [self._lib_name("parquet")] self.cpp_info.components["libparquet"].names["cmake_find_package"] = "parquet" self.cpp_info.components["libparquet"].names["cmake_find_package_multi"] = "parquet" self.cpp_info.components["libparquet"].names["pkg_config"] = "parquet" self.cpp_info.components["libparquet"].requires = ["libarrow"] + if self.options.get_safe("substrait", False): + self.cpp_info.components["libarrow_substrait"].libs = [self._lib_name("arrow_substrait")] + self.cpp_info.components["libarrow_substrait"].names["cmake_find_package"] = "arrow_substrait" + self.cpp_info.components["libarrow_substrait"].names["cmake_find_package_multi"] = "arrow_substrait" + self.cpp_info.components["libarrow_substrait"].names["pkg_config"] = "arrow_substrait" + self.cpp_info.components["libarrow_substrait"].requires = ["libparquet", "dataset"] + if self.options.plasma: self.cpp_info.components["libplasma"].libs = [self._lib_name("plasma")] self.cpp_info.components["libplasma"].names["cmake_find_package"] = "plasma" @@ -510,17 +588,24 @@ def package_info(self): self.cpp_info.components["libgandiva"].names["pkg_config"] = "gandiva" self.cpp_info.components["libgandiva"].requires = ["libarrow"] - if self.options.with_flight_rpc: + if self._with_flight_rpc(): self.cpp_info.components["libarrow_flight"].libs = [self._lib_name("arrow_flight")] self.cpp_info.components["libarrow_flight"].names["cmake_find_package"] = "flight_rpc" self.cpp_info.components["libarrow_flight"].names["cmake_find_package_multi"] = "flight_rpc" self.cpp_info.components["libarrow_flight"].names["pkg_config"] = "flight_rpc" self.cpp_info.components["libarrow_flight"].requires = ["libarrow"] - if self.options.dataset_modules: + if self.options.get_safe("with_flight_sql"): + self.cpp_info.components["libarrow_flight_sql"].libs = [self._lib_name("arrow_flight_sql")] + self.cpp_info.components["libarrow_flight_sql"].names["cmake_find_package"] = "flight_sql" + self.cpp_info.components["libarrow_flight_sql"].names["cmake_find_package_multi"] = "flight_sql" + self.cpp_info.components["libarrow_flight_sql"].names["pkg_config"] = "flight_sql" + self.cpp_info.components["libarrow_flight_sql"].requires = ["libarrow", "libarrow_flight"] + + if self._dataset_modules(): self.cpp_info.components["dataset"].libs = ["arrow_dataset"] - if self.options.cli: + if (self.options.cli and (self.options.with_cuda or self._with_flight_rpc() or self._parquet())) or self.options.plasma: binpath = os.path.join(self.package_folder, "bin") self.output.info("Appending PATH env var: {}".format(binpath)) self.env_info.PATH.append(binpath) @@ -529,7 +614,7 @@ def package_info(self): if self.options.gandiva: # FIXME: only filesystem component is used self.cpp_info.components["libgandiva"].requires.append("boost::boost") - if self.options.parquet and self.settings.compiler == "gcc" and self.settings.compiler.version < tools.Version("4.9"): + if self._parquet() and self.settings.compiler == "gcc" and self.settings.compiler.version < tools.Version("4.9"): self.cpp_info.components["libparquet"].requires.append("boost::boost") if tools.Version(self.version) >= "2.0": # FIXME: only headers components is used @@ -549,7 +634,7 @@ def package_info(self): if self._with_protobuf(): self.cpp_info.components["libarrow"].requires.append("protobuf::protobuf") if self._with_utf8proc(): - self.cpp_info.components["libarrow"].requires.append("uff8proc::uff8proc") + self.cpp_info.components["libarrow"].requires.append("utf8proc::utf8proc") if self._with_thrift(): self.cpp_info.components["libarrow"].requires.append("thrift::thrift") if self.options.with_backtrace: @@ -560,8 +645,12 @@ def package_info(self): self.cpp_info.components["libarrow"].requires.append("rapidjson::rapidjson") if self.options.with_s3: self.cpp_info.components["libarrow"].requires.append("aws-sdk-cpp::s3") + if self.options.get_safe("with_gcs"): + self.cpp_info.components["libarrow"].requires.append("google-cloud-cpp::storage") if self.options.with_orc: self.cpp_info.components["libarrow"].requires.append("orc::orc") + if self.options.get_safe("with_opentelemetry"): + self.cpp_info.components["libarrow"].requires.append("opentelemetry-cpp::opentelemetry-cpp") if self.options.with_brotli: self.cpp_info.components["libarrow"].requires.append("brotli::brotli") if self.options.with_bz2: @@ -576,6 +665,6 @@ def package_info(self): self.cpp_info.components["libarrow"].requires.append("zlib::zlib") if self.options.with_zstd: self.cpp_info.components["libarrow"].requires.append("zstd::zstd") - if self.options.with_flight_rpc: + if self._with_flight_rpc(): self.cpp_info.components["libarrow_flight"].requires.append("grpc::grpc") self.cpp_info.components["libarrow_flight"].requires.append("protobuf::protobuf") diff --git a/ci/conan/all/patches/7.0.0-0004-remove-find-modules.patch b/ci/conan/all/patches/7.0.0-0004-remove-find-modules.patch index 482bb7cd4d8fd..f0b299479e22f 100644 --- a/ci/conan/all/patches/7.0.0-0004-remove-find-modules.patch +++ b/ci/conan/all/patches/7.0.0-0004-remove-find-modules.patch @@ -20,641 +20,3 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -diff --git a/cpp/cmake_modules/FindBrotli.cmake a/cpp/cmake_modules/FindBrotli.cmake -deleted file mode 100644 -index e2670b5..0000000 ---- a/cpp/cmake_modules/FindBrotli.cmake -+++ /dev/null -@@ -1,130 +0,0 @@ --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. --# --# Tries to find Brotli headers and libraries. --# --# Usage of this module as follows: --# --# find_package(Brotli) -- --if(ARROW_BROTLI_USE_SHARED) -- set(BROTLI_COMMON_LIB_NAMES -- brotlicommon -- ${CMAKE_SHARED_LIBRARY_PREFIX}brotlicommon${CMAKE_SHARED_LIBRARY_SUFFIX}) -- -- set(BROTLI_ENC_LIB_NAMES -- brotlienc ${CMAKE_SHARED_LIBRARY_PREFIX}brotlienc${CMAKE_SHARED_LIBRARY_SUFFIX}) -- -- set(BROTLI_DEC_LIB_NAMES -- brotlidec ${CMAKE_SHARED_LIBRARY_PREFIX}brotlidec${CMAKE_SHARED_LIBRARY_SUFFIX}) --else() -- set(BROTLI_COMMON_LIB_NAMES -- brotlicommon-static -- ${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX} -- ${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon_static${CMAKE_STATIC_LIBRARY_SUFFIX} -- ${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon${CMAKE_STATIC_LIBRARY_SUFFIX}) -- -- set(BROTLI_ENC_LIB_NAMES -- brotlienc-static -- ${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX} -- ${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc_static${CMAKE_STATIC_LIBRARY_SUFFIX} -- ${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc${CMAKE_STATIC_LIBRARY_SUFFIX}) -- -- set(BROTLI_DEC_LIB_NAMES -- brotlidec-static -- ${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX} -- ${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec_static${CMAKE_STATIC_LIBRARY_SUFFIX} -- ${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec${CMAKE_STATIC_LIBRARY_SUFFIX}) --endif() -- --if(BROTLI_ROOT) -- find_library(BROTLI_COMMON_LIBRARY -- NAMES ${BROTLI_COMMON_LIB_NAMES} -- PATHS ${BROTLI_ROOT} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES} -- NO_DEFAULT_PATH) -- find_library(BROTLI_ENC_LIBRARY -- NAMES ${BROTLI_ENC_LIB_NAMES} -- PATHS ${BROTLI_ROOT} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES} -- NO_DEFAULT_PATH) -- find_library(BROTLI_DEC_LIBRARY -- NAMES ${BROTLI_DEC_LIB_NAMES} -- PATHS ${BROTLI_ROOT} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES} -- NO_DEFAULT_PATH) -- find_path(BROTLI_INCLUDE_DIR -- NAMES brotli/decode.h -- PATHS ${BROTLI_ROOT} -- PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES} -- NO_DEFAULT_PATH) --else() -- find_package(PkgConfig QUIET) -- pkg_check_modules(BROTLI_PC libbrotlicommon libbrotlienc libbrotlidec) -- if(BROTLI_PC_FOUND) -- set(BROTLI_INCLUDE_DIR "${BROTLI_PC_libbrotlicommon_INCLUDEDIR}") -- -- # Some systems (e.g. Fedora) don't fill Brotli_LIBRARY_DIRS, so add the other dirs here. -- list(APPEND BROTLI_PC_LIBRARY_DIRS "${BROTLI_PC_libbrotlicommon_LIBDIR}") -- list(APPEND BROTLI_PC_LIBRARY_DIRS "${BROTLI_PC_libbrotlienc_LIBDIR}") -- list(APPEND BROTLI_PC_LIBRARY_DIRS "${BROTLI_PC_libbrotlidec_LIBDIR}") -- -- find_library(BROTLI_COMMON_LIBRARY -- NAMES ${BROTLI_COMMON_LIB_NAMES} -- PATHS ${BROTLI_PC_LIBRARY_DIRS} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES} -- NO_DEFAULT_PATH) -- find_library(BROTLI_ENC_LIBRARY -- NAMES ${BROTLI_ENC_LIB_NAMES} -- PATHS ${BROTLI_PC_LIBRARY_DIRS} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES} -- NO_DEFAULT_PATH) -- find_library(BROTLI_DEC_LIBRARY -- NAMES ${BROTLI_DEC_LIB_NAMES} -- PATHS ${BROTLI_PC_LIBRARY_DIRS} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES} -- NO_DEFAULT_PATH) -- else() -- find_library(BROTLI_COMMON_LIBRARY -- NAMES ${BROTLI_COMMON_LIB_NAMES} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}) -- find_library(BROTLI_ENC_LIBRARY -- NAMES ${BROTLI_ENC_LIB_NAMES} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}) -- find_library(BROTLI_DEC_LIBRARY -- NAMES ${BROTLI_DEC_LIB_NAMES} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}) -- find_path(BROTLI_INCLUDE_DIR -- NAMES brotli/decode.h -- PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES}) -- endif() --endif() -- --find_package_handle_standard_args( -- Brotli REQUIRED_VARS BROTLI_COMMON_LIBRARY BROTLI_ENC_LIBRARY BROTLI_DEC_LIBRARY -- BROTLI_INCLUDE_DIR) --if(Brotli_FOUND OR BROTLI_FOUND) -- set(Brotli_FOUND TRUE) -- add_library(Brotli::brotlicommon UNKNOWN IMPORTED) -- set_target_properties(Brotli::brotlicommon -- PROPERTIES IMPORTED_LOCATION "${BROTLI_COMMON_LIBRARY}" -- INTERFACE_INCLUDE_DIRECTORIES "${BROTLI_INCLUDE_DIR}") -- add_library(Brotli::brotlienc UNKNOWN IMPORTED) -- set_target_properties(Brotli::brotlienc -- PROPERTIES IMPORTED_LOCATION "${BROTLI_ENC_LIBRARY}" -- INTERFACE_INCLUDE_DIRECTORIES "${BROTLI_INCLUDE_DIR}") -- add_library(Brotli::brotlidec UNKNOWN IMPORTED) -- set_target_properties(Brotli::brotlidec -- PROPERTIES IMPORTED_LOCATION "${BROTLI_DEC_LIBRARY}" -- INTERFACE_INCLUDE_DIRECTORIES "${BROTLI_INCLUDE_DIR}") --endif() -diff --git a/cpp/cmake_modules/FindLz4.cmake a/cpp/cmake_modules/FindLz4.cmake -deleted file mode 100644 -index bc8051f..0000000 ---- a/cpp/cmake_modules/FindLz4.cmake -+++ /dev/null -@@ -1,84 +0,0 @@ --# Licensed to the Apache Software Foundation (ASF) under one --# or more contributor license agreements. See the NOTICE file --# distributed with this work for additional information --# regarding copyright ownership. The ASF licenses this file --# to you under the Apache License, Version 2.0 (the --# "License"); you may not use this file except in compliance --# with the License. You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, --# software distributed under the License is distributed on an --# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --# KIND, either express or implied. See the License for the --# specific language governing permissions and limitations --# under the License. -- --if(MSVC_TOOLCHAIN AND NOT DEFINED LZ4_MSVC_LIB_PREFIX) -- set(LZ4_MSVC_LIB_PREFIX "lib") --endif() --set(LZ4_LIB_NAME_BASE "${LZ4_MSVC_LIB_PREFIX}lz4") -- --if(ARROW_LZ4_USE_SHARED) -- set(LZ4_LIB_NAMES) -- if(CMAKE_IMPORT_LIBRARY_SUFFIX) -- list(APPEND -- LZ4_LIB_NAMES -- "${CMAKE_IMPORT_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}" -- ) -- endif() -- list(APPEND LZ4_LIB_NAMES -- "${CMAKE_SHARED_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}") --else() -- if(MSVC AND NOT DEFINED LZ4_MSVC_STATIC_LIB_SUFFIX) -- set(LZ4_MSVC_STATIC_LIB_SUFFIX "_static") -- endif() -- set(LZ4_STATIC_LIB_SUFFIX "${LZ4_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}") -- set(LZ4_LIB_NAMES -- "${CMAKE_STATIC_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${LZ4_STATIC_LIB_SUFFIX}") --endif() -- --if(LZ4_ROOT) -- find_library(LZ4_LIB -- NAMES ${LZ4_LIB_NAMES} -- PATHS ${LZ4_ROOT} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES} -- NO_DEFAULT_PATH) -- find_path(LZ4_INCLUDE_DIR -- NAMES lz4.h -- PATHS ${LZ4_ROOT} -- NO_DEFAULT_PATH -- PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES}) -- --else() -- find_package(PkgConfig QUIET) -- pkg_check_modules(LZ4_PC liblz4) -- if(LZ4_PC_FOUND) -- set(LZ4_INCLUDE_DIR "${LZ4_PC_INCLUDEDIR}") -- -- list(APPEND LZ4_PC_LIBRARY_DIRS "${LZ4_PC_LIBDIR}") -- find_library(LZ4_LIB -- NAMES ${LZ4_LIB_NAMES} -- PATHS ${LZ4_PC_LIBRARY_DIRS} -- NO_DEFAULT_PATH -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}) -- else() -- find_library(LZ4_LIB -- NAMES ${LZ4_LIB_NAMES} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}) -- find_path(LZ4_INCLUDE_DIR -- NAMES lz4.h -- PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES}) -- endif() --endif() -- --find_package_handle_standard_args(Lz4 REQUIRED_VARS LZ4_LIB LZ4_INCLUDE_DIR) -- --if(Lz4_FOUND) -- set(Lz4_FOUND TRUE) -- add_library(LZ4::lz4 UNKNOWN IMPORTED) -- set_target_properties(LZ4::lz4 -- PROPERTIES IMPORTED_LOCATION "${LZ4_LIB}" -- INTERFACE_INCLUDE_DIRECTORIES "${LZ4_INCLUDE_DIR}") --endif() -diff --git a/cpp/cmake_modules/FindSnappy.cmake a/cpp/cmake_modules/FindSnappy.cmake -deleted file mode 100644 -index 747df31..0000000 ---- a/cpp/cmake_modules/FindSnappy.cmake -+++ /dev/null -@@ -1,62 +0,0 @@ --# Licensed to the Apache Software Foundation (ASF) under one --# or more contributor license agreements. See the NOTICE file --# distributed with this work for additional information --# regarding copyright ownership. The ASF licenses this file --# to you under the Apache License, Version 2.0 (the --# "License"); you may not use this file except in compliance --# with the License. You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, --# software distributed under the License is distributed on an --# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --# KIND, either express or implied. See the License for the --# specific language governing permissions and limitations --# under the License. -- --if(ARROW_SNAPPY_USE_SHARED) -- set(SNAPPY_LIB_NAMES) -- if(CMAKE_IMPORT_LIBRARY_SUFFIX) -- list(APPEND SNAPPY_LIB_NAMES -- "${CMAKE_IMPORT_LIBRARY_PREFIX}snappy${CMAKE_IMPORT_LIBRARY_SUFFIX}") -- endif() -- list(APPEND SNAPPY_LIB_NAMES -- "${CMAKE_SHARED_LIBRARY_PREFIX}snappy${CMAKE_SHARED_LIBRARY_SUFFIX}") --else() -- set(SNAPPY_STATIC_LIB_NAME_BASE "snappy") -- if(MSVC) -- set(SNAPPY_STATIC_LIB_NAME_BASE -- "${SNAPPY_STATIC_LIB_NAME_BASE}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}") -- endif() -- set(SNAPPY_LIB_NAMES -- "${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}" -- ) --endif() -- --if(Snappy_ROOT) -- find_library(Snappy_LIB -- NAMES ${SNAPPY_LIB_NAMES} -- PATHS ${Snappy_ROOT} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES} -- NO_DEFAULT_PATH) -- find_path(Snappy_INCLUDE_DIR -- NAMES snappy.h -- PATHS ${Snappy_ROOT} -- NO_DEFAULT_PATH -- PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES}) --else() -- find_library(Snappy_LIB NAMES ${SNAPPY_LIB_NAMES}) -- find_path(Snappy_INCLUDE_DIR -- NAMES snappy.h -- PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES}) --endif() -- --find_package_handle_standard_args(Snappy REQUIRED_VARS Snappy_LIB Snappy_INCLUDE_DIR) -- --if(Snappy_FOUND) -- add_library(Snappy::snappy UNKNOWN IMPORTED) -- set_target_properties(Snappy::snappy -- PROPERTIES IMPORTED_LOCATION "${Snappy_LIB}" -- INTERFACE_INCLUDE_DIRECTORIES "${Snappy_INCLUDE_DIR}") --endif() -diff --git a/cpp/cmake_modules/FindThrift.cmake b/cpp/cmake_modules/FindThrift.cmake -index 750d8ce..e69de29 100644 ---- a/cpp/cmake_modules/FindThrift.cmake -+++ b/cpp/cmake_modules/FindThrift.cmake -@@ -1,144 +0,0 @@ --# Copyright 2012 Cloudera Inc. --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. -- --# - Find Thrift (a cross platform RPC lib/tool) --# --# Variables used by this module, they can change the default behaviour and need --# to be set before calling find_package: --# --# Thrift_ROOT - When set, this path is inspected instead of standard library --# locations as the root of the Thrift installation. --# The environment variable THRIFT_HOME overrides this variable. --# --# This module defines --# THRIFT_VERSION, version string of ant if found --# THRIFT_INCLUDE_DIR, where to find THRIFT headers --# THRIFT_LIB, THRIFT library --# THRIFT_FOUND, If false, do not try to use ant -- --function(EXTRACT_THRIFT_VERSION) -- if(THRIFT_INCLUDE_DIR) -- file(READ "${THRIFT_INCLUDE_DIR}/thrift/config.h" THRIFT_CONFIG_H_CONTENT) -- string(REGEX MATCH "#define PACKAGE_VERSION \"[0-9.]+\"" THRIFT_VERSION_DEFINITION -- "${THRIFT_CONFIG_H_CONTENT}") -- string(REGEX MATCH "[0-9.]+" THRIFT_VERSION "${THRIFT_VERSION_DEFINITION}") -- set(THRIFT_VERSION -- "${THRIFT_VERSION}" -- PARENT_SCOPE) -- else() -- set(THRIFT_VERSION -- "" -- PARENT_SCOPE) -- endif() --endfunction(EXTRACT_THRIFT_VERSION) -- --if(MSVC_TOOLCHAIN AND NOT DEFINED THRIFT_MSVC_LIB_SUFFIX) -- if(NOT ARROW_THRIFT_USE_SHARED) -- if(ARROW_USE_STATIC_CRT) -- set(THRIFT_MSVC_LIB_SUFFIX "mt") -- else() -- set(THRIFT_MSVC_LIB_SUFFIX "md") -- endif() -- endif() --endif() --set(THRIFT_LIB_NAME_BASE "thrift${THRIFT_MSVC_LIB_SUFFIX}") -- --if(ARROW_THRIFT_USE_SHARED) -- set(THRIFT_LIB_NAMES thrift) -- if(CMAKE_IMPORT_LIBRARY_SUFFIX) -- list(APPEND -- THRIFT_LIB_NAMES -- "${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}" -- ) -- endif() -- list(APPEND -- THRIFT_LIB_NAMES -- "${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}" -- ) --else() -- set(THRIFT_LIB_NAMES -- "${CMAKE_STATIC_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}" -- ) --endif() -- --if(Thrift_ROOT) -- find_library(THRIFT_LIB -- NAMES ${THRIFT_LIB_NAMES} -- PATHS ${Thrift_ROOT} -- PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib") -- find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h -- PATHS ${Thrift_ROOT} -- PATH_SUFFIXES "include") -- find_program(THRIFT_COMPILER thrift -- PATHS ${Thrift_ROOT} -- PATH_SUFFIXES "bin") -- extract_thrift_version() --else() -- # THRIFT-4760: The pkgconfig files are currently only installed when using autotools. -- # Starting with 0.13, they are also installed for the CMake-based installations of Thrift. -- find_package(PkgConfig QUIET) -- pkg_check_modules(THRIFT_PC thrift) -- if(THRIFT_PC_FOUND) -- set(THRIFT_INCLUDE_DIR "${THRIFT_PC_INCLUDEDIR}") -- -- list(APPEND THRIFT_PC_LIBRARY_DIRS "${THRIFT_PC_LIBDIR}") -- -- find_library(THRIFT_LIB -- NAMES ${THRIFT_LIB_NAMES} -- PATHS ${THRIFT_PC_LIBRARY_DIRS} -- NO_DEFAULT_PATH) -- find_program(THRIFT_COMPILER thrift -- HINTS ${THRIFT_PC_PREFIX} -- NO_DEFAULT_PATH -- PATH_SUFFIXES "bin") -- set(THRIFT_VERSION ${THRIFT_PC_VERSION}) -- else() -- find_library(THRIFT_LIB -- NAMES ${THRIFT_LIB_NAMES} -- PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib") -- find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h PATH_SUFFIXES "include") -- find_program(THRIFT_COMPILER thrift PATH_SUFFIXES "bin") -- extract_thrift_version() -- endif() --endif() -- --if(THRIFT_COMPILER) -- set(Thrift_COMPILER_FOUND TRUE) --else() -- set(Thrift_COMPILER_FOUND FALSE) --endif() -- --find_package_handle_standard_args( -- Thrift -- REQUIRED_VARS THRIFT_LIB THRIFT_INCLUDE_DIR -- VERSION_VAR THRIFT_VERSION -- HANDLE_COMPONENTS) -- --if(Thrift_FOUND OR THRIFT_FOUND) -- set(Thrift_FOUND TRUE) -- if(ARROW_THRIFT_USE_SHARED) -- add_library(thrift::thrift SHARED IMPORTED) -- else() -- add_library(thrift::thrift STATIC IMPORTED) -- endif() -- set_target_properties(thrift::thrift -- PROPERTIES IMPORTED_LOCATION "${THRIFT_LIB}" -- INTERFACE_INCLUDE_DIRECTORIES "${THRIFT_INCLUDE_DIR}") -- if(WIN32 AND NOT MSVC_TOOLCHAIN) -- # We don't need this for Visual C++ because Thrift uses -- # "#pragma comment(lib, "Ws2_32.lib")" in -- # thrift/windows/config.h for Visual C++. -- set_target_properties(thrift::thrift PROPERTIES INTERFACE_LINK_LIBRARIES "ws2_32") -- endif() --endif() -diff --git a/cpp/cmake_modules/Findjemalloc.cmake a/cpp/cmake_modules/Findjemalloc.cmake -deleted file mode 100644 -index 84bb81f..0000000 ---- a/cpp/cmake_modules/Findjemalloc.cmake -+++ /dev/null -@@ -1,94 +0,0 @@ --# --# Licensed under the Apache License, Version 2.0 (the "License"); --# you may not use this file except in compliance with the License. --# You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, software --# distributed under the License is distributed on an "AS IS" BASIS, --# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --# See the License for the specific language governing permissions and --# limitations under the License. --# --# Tries to find jemalloc headers and libraries. --# --# Usage of this module as follows: --# --# find_package(jemalloc) --# --# Variables used by this module, they can change the default behaviour and need --# to be set before calling find_package: --# --# JEMALLOC_HOME - --# When set, this path is inspected instead of standard library locations as --# the root of the jemalloc installation. The environment variable --# JEMALLOC_HOME overrides this veriable. --# --# This module defines --# JEMALLOC_INCLUDE_DIR, directory containing headers --# JEMALLOC_SHARED_LIB, path to libjemalloc.so/dylib --# JEMALLOC_FOUND, whether flatbuffers has been found -- --if(NOT "${JEMALLOC_HOME}" STREQUAL "") -- file(TO_CMAKE_PATH "${JEMALLOC_HOME}" _native_path) -- list(APPEND _jemalloc_roots ${_native_path}) --elseif(JEMALLOC_HOME) -- list(APPEND _jemalloc_roots ${JEMALLOC_HOME}) --endif() -- --set(LIBJEMALLOC_NAMES jemalloc libjemalloc.so.1 libjemalloc.so.2 libjemalloc.dylib) -- --# Try the parameterized roots, if they exist --if(_jemalloc_roots) -- find_path(JEMALLOC_INCLUDE_DIR -- NAMES jemalloc/jemalloc.h -- PATHS ${_jemalloc_roots} -- NO_DEFAULT_PATH -- PATH_SUFFIXES "include") -- find_library(JEMALLOC_SHARED_LIB -- NAMES ${LIBJEMALLOC_NAMES} -- PATHS ${_jemalloc_roots} -- NO_DEFAULT_PATH -- PATH_SUFFIXES "lib") -- find_library(JEMALLOC_STATIC_LIB -- NAMES jemalloc_pic -- PATHS ${_jemalloc_roots} -- NO_DEFAULT_PATH -- PATH_SUFFIXES "lib") --else() -- find_path(JEMALLOC_INCLUDE_DIR NAMES jemalloc/jemalloc.h) -- message(STATUS ${JEMALLOC_INCLUDE_DIR}) -- find_library(JEMALLOC_SHARED_LIB NAMES ${LIBJEMALLOC_NAMES}) -- message(STATUS ${JEMALLOC_SHARED_LIB}) -- find_library(JEMALLOC_STATIC_LIB NAMES jemalloc_pic) -- message(STATUS ${JEMALLOC_STATIC_LIB}) --endif() -- --if(JEMALLOC_INCLUDE_DIR AND JEMALLOC_SHARED_LIB) -- set(JEMALLOC_FOUND TRUE) --else() -- set(JEMALLOC_FOUND FALSE) --endif() -- --if(JEMALLOC_FOUND) -- if(NOT jemalloc_FIND_QUIETLY) -- message(STATUS "Found the jemalloc library: ${JEMALLOC_LIBRARIES}") -- endif() --else() -- if(NOT jemalloc_FIND_QUIETLY) -- set(JEMALLOC_ERR_MSG "Could not find the jemalloc library. Looked in ") -- if(_flatbuffers_roots) -- set(JEMALLOC_ERR_MSG "${JEMALLOC_ERR_MSG} in ${_jemalloc_roots}.") -- else() -- set(JEMALLOC_ERR_MSG "${JEMALLOC_ERR_MSG} system search paths.") -- endif() -- if(jemalloc_FIND_REQUIRED) -- message(FATAL_ERROR "${JEMALLOC_ERR_MSG}") -- else(jemalloc_FIND_REQUIRED) -- message(STATUS "${JEMALLOC_ERR_MSG}") -- endif(jemalloc_FIND_REQUIRED) -- endif() --endif() -- --mark_as_advanced(JEMALLOC_INCLUDE_DIR JEMALLOC_SHARED_LIB) -diff --git a/cpp/cmake_modules/Findzstd.cmake a/cpp/cmake_modules/Findzstd.cmake -deleted file mode 100644 -index 3fc14ec..0000000 ---- a/cpp/cmake_modules/Findzstd.cmake -+++ /dev/null -@@ -1,89 +0,0 @@ --# Licensed to the Apache Software Foundation (ASF) under one --# or more contributor license agreements. See the NOTICE file --# distributed with this work for additional information --# regarding copyright ownership. The ASF licenses this file --# to you under the Apache License, Version 2.0 (the --# "License"); you may not use this file except in compliance --# with the License. You may obtain a copy of the License at --# --# http://www.apache.org/licenses/LICENSE-2.0 --# --# Unless required by applicable law or agreed to in writing, --# software distributed under the License is distributed on an --# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --# KIND, either express or implied. See the License for the --# specific language governing permissions and limitations --# under the License. -- --if(MSVC AND NOT DEFINED ZSTD_MSVC_LIB_PREFIX) -- set(ZSTD_MSVC_LIB_PREFIX "lib") --endif() --set(ZSTD_LIB_NAME_BASE "${ZSTD_MSVC_LIB_PREFIX}zstd") -- --if(ARROW_ZSTD_USE_SHARED) -- set(ZSTD_LIB_NAMES) -- if(CMAKE_IMPORT_LIBRARY_SUFFIX) -- list(APPEND -- ZSTD_LIB_NAMES -- "${CMAKE_IMPORT_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}" -- ) -- endif() -- list(APPEND ZSTD_LIB_NAMES -- "${CMAKE_SHARED_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}" -- ) --else() -- if(MSVC AND NOT DEFINED ZSTD_MSVC_STATIC_LIB_SUFFIX) -- set(ZSTD_MSVC_STATIC_LIB_SUFFIX "_static") -- endif() -- set(ZSTD_STATIC_LIB_SUFFIX -- "${ZSTD_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}") -- set(ZSTD_LIB_NAMES -- "${CMAKE_STATIC_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${ZSTD_STATIC_LIB_SUFFIX}") --endif() -- --# First, find via if specified ZSTD_ROOT --if(ZSTD_ROOT) -- message(STATUS "Using ZSTD_ROOT: ${ZSTD_ROOT}") -- find_library(ZSTD_LIB -- NAMES ${ZSTD_LIB_NAMES} -- PATHS ${ZSTD_ROOT} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES} -- NO_DEFAULT_PATH) -- find_path(ZSTD_INCLUDE_DIR -- NAMES zstd.h -- PATHS ${ZSTD_ROOT} -- NO_DEFAULT_PATH -- PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES}) -- --else() -- # Second, find via pkg_check_modules -- find_package(PkgConfig QUIET) -- pkg_check_modules(ZSTD_PC libzstd) -- if(ZSTD_PC_FOUND) -- set(ZSTD_INCLUDE_DIR "${ZSTD_PC_INCLUDEDIR}") -- -- list(APPEND ZSTD_PC_LIBRARY_DIRS "${ZSTD_PC_LIBDIR}") -- find_library(ZSTD_LIB -- NAMES ${ZSTD_LIB_NAMES} -- PATHS ${ZSTD_PC_LIBRARY_DIRS} -- NO_DEFAULT_PATH -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}) -- else() -- # Third, check all other CMake paths -- find_library(ZSTD_LIB -- NAMES ${ZSTD_LIB_NAMES} -- PATH_SUFFIXES ${ARROW_LIBRARY_PATH_SUFFIXES}) -- find_path(ZSTD_INCLUDE_DIR -- NAMES zstd.h -- PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES}) -- endif() --endif() -- --find_package_handle_standard_args(zstd REQUIRED_VARS ZSTD_LIB ZSTD_INCLUDE_DIR) -- --if(zstd_FOUND) -- add_library(zstd::libzstd UNKNOWN IMPORTED) -- set_target_properties(zstd::libzstd -- PROPERTIES IMPORTED_LOCATION "${ZSTD_LIB}" -- INTERFACE_INCLUDE_DIRECTORIES "${ZSTD_INCLUDE_DIR}") --endif() diff --git a/ci/conan/all/patches/7.0.0-0005-use-find-package.patch b/ci/conan/all/patches/7.0.0-0005-use-find-package.patch index 085871aaea381..0759339c23ac7 100644 --- a/ci/conan/all/patches/7.0.0-0005-use-find-package.patch +++ b/ci/conan/all/patches/7.0.0-0005-use-find-package.patch @@ -45,7 +45,7 @@ index 2d7baf1..c2e86e0 100644 if(ARROW_MIMALLOC) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake -index bc38952..ff81d00 100644 +index bc38952..84fc279 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -953,14 +953,7 @@ else() @@ -90,7 +90,23 @@ index bc38952..ff81d00 100644 # TODO: Don't use global includes but rather target_include_directories get_target_property(BROTLI_INCLUDE_DIR Brotli::brotlicommon INTERFACE_INCLUDE_DIRECTORIES) -@@ -1228,10 +1224,13 @@ macro(build_glog) +@@ -1156,6 +1152,15 @@ if(PARQUET_REQUIRE_ENCRYPTION + set(OpenSSL_USE_STATIC_LIBS ON) + set(OPENSSL_USE_STATIC_LIBS ON) + find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED) ++ find_package(OpenSSL REQUIRED CONFIG) ++ message("OPENSSL_FOUND: ${OPENSSL_FOUND}") ++ message("OPENSSL_INCLUDE_DIR: ${OPENSSL_INCLUDE_DIR}") ++ message("OPENSSL_CRYPTO_LIBRARY: ${OPENSSL_CRYPTO_LIBRARY}") ++ message("OPENSSL_CRYPTO_LIBRARIES: ${OPENSSL_CRYPTO_LIBRARIES}") ++ message("OPENSSL_SSL_LIBRARY: ${OPENSSL_SSL_LIBRARY}") ++ message("OPENSSL_SSL_LIBRARIES: ${OPENSSL_SSL_LIBRARIES}") ++ message("OPENSSL_LIBRARIES: ${OPENSSL_LIBRARIES}") ++ message("OPENSSL_VERSION: ${OPENSSL_VERSION}") + endif() + set(ARROW_USE_OPENSSL ON) + endif() +@@ -1228,10 +1233,13 @@ macro(build_glog) endmacro() if(ARROW_USE_GLOG) @@ -104,7 +120,7 @@ index bc38952..ff81d00 100644 endif() # ---------------------------------------------------------------------- -@@ -1300,14 +1299,7 @@ macro(build_gflags) +@@ -1300,17 +1308,11 @@ macro(build_gflags) endmacro() if(ARROW_NEED_GFLAGS) @@ -120,7 +136,22 @@ index bc38952..ff81d00 100644 # TODO: Don't use global includes but rather target_include_directories include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR}) -@@ -1400,6 +1392,7 @@ macro(build_thrift) ++if(0) + if(NOT TARGET ${GFLAGS_LIBRARIES}) + if(TARGET gflags-shared) + set(GFLAGS_LIBRARIES gflags-shared) +@@ -1318,6 +1320,10 @@ if(ARROW_NEED_GFLAGS) + set(GFLAGS_LIBRARIES gflags_shared) + endif() + endif() ++else() ++ set(GFLAGS_LIBRARIES gflags::gflags) ++endif() ++ + endif() + + # ---------------------------------------------------------------------- +@@ -1400,6 +1406,7 @@ macro(build_thrift) endmacro() if(ARROW_WITH_THRIFT) @@ -128,7 +159,7 @@ index bc38952..ff81d00 100644 # We already may have looked for Thrift earlier, when considering whether # to build Boost, so don't look again if already found. if(NOT Thrift_FOUND) -@@ -1412,6 +1405,9 @@ if(ARROW_WITH_THRIFT) +@@ -1412,6 +1419,9 @@ if(ARROW_WITH_THRIFT) endif() # TODO: Don't use global includes but rather target_include_directories include_directories(SYSTEM ${THRIFT_INCLUDE_DIR}) @@ -138,7 +169,7 @@ index bc38952..ff81d00 100644 string(REPLACE "." ";" VERSION_LIST ${THRIFT_VERSION}) list(GET VERSION_LIST 0 THRIFT_VERSION_MAJOR) -@@ -1606,7 +1602,7 @@ if(ARROW_JEMALLOC) +@@ -1606,7 +1616,7 @@ if(ARROW_JEMALLOC) # conflict with the default allocator as well as other jemalloc # installations. # find_package(jemalloc) @@ -147,7 +178,7 @@ index bc38952..ff81d00 100644 set(ARROW_JEMALLOC_USE_SHARED OFF) set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/") -@@ -1664,6 +1660,9 @@ if(ARROW_JEMALLOC) +@@ -1664,6 +1674,9 @@ if(ARROW_JEMALLOC) "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src") add_dependencies(jemalloc::jemalloc jemalloc_ep) @@ -157,7 +188,7 @@ index bc38952..ff81d00 100644 list(APPEND ARROW_BUNDLED_STATIC_LIBS jemalloc::jemalloc) endif() -@@ -1671,6 +1670,8 @@ endif() +@@ -1671,6 +1684,8 @@ endif() # mimalloc - Cross-platform high-performance allocator, from Microsoft if(ARROW_MIMALLOC) @@ -166,7 +197,7 @@ index bc38952..ff81d00 100644 message(STATUS "Building (vendored) mimalloc from source") # We only use a vendored mimalloc as we want to control its build options. -@@ -1715,6 +1716,13 @@ if(ARROW_MIMALLOC) +@@ -1715,6 +1730,13 @@ if(ARROW_MIMALLOC) add_dependencies(mimalloc::mimalloc mimalloc_ep) add_dependencies(toolchain mimalloc_ep) @@ -180,7 +211,26 @@ index bc38952..ff81d00 100644 list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc::mimalloc) endif() -@@ -2036,10 +2044,21 @@ macro(build_xsimd) +@@ -1999,6 +2021,7 @@ macro(build_rapidjson) + endmacro() + + if(ARROW_WITH_RAPIDJSON) ++if(0) + set(ARROW_RAPIDJSON_REQUIRED_VERSION "1.1.0") + resolve_dependency(RapidJSON + HAVE_ALT +@@ -2011,6 +2034,10 @@ if(ARROW_WITH_RAPIDJSON) + if(RapidJSON_INCLUDE_DIR) + set(RAPIDJSON_INCLUDE_DIR "${RapidJSON_INCLUDE_DIR}") + endif() ++else() ++ find_package(RapidJSON REQUIRED) ++ set(RAPIDJSON_INCLUDE_DIR "${RapidJSON_INCLUDE_DIR}") ++endif() + + # TODO: Don't use global includes but rather target_include_directories + include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR}) +@@ -2036,10 +2063,21 @@ macro(build_xsimd) set(XSIMD_VENDORED TRUE) endmacro() @@ -204,7 +254,7 @@ index bc38952..ff81d00 100644 # TODO: Don't use global includes but rather target_include_directories include_directories(SYSTEM ${XSIMD_INCLUDE_DIR}) endif() -@@ -2082,11 +2101,14 @@ macro(build_zlib) +@@ -2082,11 +2120,14 @@ macro(build_zlib) endmacro() if(ARROW_WITH_ZLIB) @@ -219,7 +269,7 @@ index bc38952..ff81d00 100644 endif() macro(build_lz4) -@@ -2140,11 +2162,14 @@ macro(build_lz4) +@@ -2140,11 +2181,14 @@ macro(build_lz4) endmacro() if(ARROW_WITH_LZ4) @@ -234,7 +284,7 @@ index bc38952..ff81d00 100644 endif() macro(build_zstd) -@@ -2205,6 +2230,7 @@ macro(build_zstd) +@@ -2205,6 +2249,7 @@ macro(build_zstd) endmacro() if(ARROW_WITH_ZSTD) @@ -242,7 +292,7 @@ index bc38952..ff81d00 100644 # ARROW-13384: ZSTD_minCLevel was added in v1.4.0, required by ARROW-13091 resolve_dependency(zstd PC_PACKAGE_NAMES -@@ -2232,6 +2258,8 @@ if(ARROW_WITH_ZSTD) +@@ -2232,6 +2277,8 @@ if(ARROW_WITH_ZSTD) get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM ${ZSTD_INCLUDE_DIR}) @@ -251,7 +301,7 @@ index bc38952..ff81d00 100644 endif() # ---------------------------------------------------------------------- -@@ -2271,6 +2299,7 @@ macro(build_re2) +@@ -2271,6 +2318,7 @@ macro(build_re2) endmacro() if(ARROW_WITH_RE2) @@ -259,7 +309,7 @@ index bc38952..ff81d00 100644 # Don't specify "PC_PACKAGE_NAMES re2" here because re2.pc may # include -std=c++11. It's not compatible with C source and C++ # source not uses C++ 11. -@@ -2284,6 +2313,8 @@ if(ARROW_WITH_RE2) +@@ -2284,6 +2332,8 @@ if(ARROW_WITH_RE2) # TODO: Don't use global includes but rather target_include_directories get_target_property(RE2_INCLUDE_DIR re2::re2 INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM ${RE2_INCLUDE_DIR}) @@ -268,7 +318,7 @@ index bc38952..ff81d00 100644 endif() macro(build_bzip2) -@@ -2335,10 +2366,7 @@ macro(build_bzip2) +@@ -2335,10 +2385,7 @@ macro(build_bzip2) endmacro() if(ARROW_WITH_BZ2) @@ -280,7 +330,7 @@ index bc38952..ff81d00 100644 if(NOT TARGET BZip2::BZip2) add_library(BZip2::BZip2 UNKNOWN IMPORTED) -@@ -2390,11 +2418,7 @@ macro(build_utf8proc) +@@ -2390,11 +2437,7 @@ macro(build_utf8proc) endmacro() if(ARROW_WITH_UTF8PROC) @@ -293,7 +343,7 @@ index bc38952..ff81d00 100644 add_definitions(-DARROW_WITH_UTF8PROC) -@@ -3554,33 +3578,12 @@ if(ARROW_WITH_GRPC) +@@ -3554,33 +3597,12 @@ if(ARROW_WITH_GRPC) message(STATUS "Forcing gRPC_SOURCE to Protobuf_SOURCE (${Protobuf_SOURCE})") set(gRPC_SOURCE "${Protobuf_SOURCE}") endif() @@ -328,6 +378,35 @@ index bc38952..ff81d00 100644 endif() # ---------------------------------------------------------------------- +@@ -3770,7 +3792,12 @@ macro(build_google_cloud_cpp_storage) + endmacro() + + if(ARROW_WITH_GOOGLE_CLOUD_CPP) ++if(0) + resolve_dependency(google_cloud_cpp_storage) ++else() ++ find_package(google-cloud-cpp REQUIRED) ++endif() ++ + get_target_property(google_cloud_cpp_storage_INCLUDE_DIR google-cloud-cpp::storage + INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${google_cloud_cpp_storage_INCLUDE_DIR}) +@@ -4097,11 +4124,15 @@ macro(build_opentelemetry) + endmacro() + + if(ARROW_WITH_OPENTELEMETRY) ++if(0) + set(opentelemetry-cpp_SOURCE "AUTO") + resolve_dependency(opentelemetry-cpp) + get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::api + INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${OPENTELEMETRY_INCLUDE_DIR}) ++else() ++ find_package(opentelemetry-cpp REQUIRED) ++endif() + message(STATUS "Found OpenTelemetry headers: ${OPENTELEMETRY_INCLUDE_DIR}") + endif() + diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 84975e2..7779c08 100644 --- a/cpp/src/arrow/CMakeLists.txt diff --git a/ci/conan/all/patches/7.0.0-0006-install-utils.patch b/ci/conan/all/patches/7.0.0-0006-install-utils.patch new file mode 100644 index 0000000000000..7674174c8e254 --- /dev/null +++ b/ci/conan/all/patches/7.0.0-0006-install-utils.patch @@ -0,0 +1,39 @@ +MIT License + +Copyright (c) 2019 Conan.io + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt +index 495018e..f6cee6f 100644 +--- a/cpp/src/arrow/ipc/CMakeLists.txt ++++ b/cpp/src/arrow/ipc/CMakeLists.txt +@@ -61,8 +61,12 @@ endif() + if(ARROW_BUILD_UTILITIES OR ARROW_BUILD_INTEGRATION) + add_executable(arrow-file-to-stream file_to_stream.cc) + target_link_libraries(arrow-file-to-stream ${ARROW_UTIL_LIB}) ++ install(TARGETS arrow-file-to-stream ${INSTALL_IS_OPTIONAL} ++ DESTINATION ${CMAKE_INSTALL_BINDIR}) + add_executable(arrow-stream-to-file stream_to_file.cc) + target_link_libraries(arrow-stream-to-file ${ARROW_UTIL_LIB}) ++ install(TARGETS arrow-stream-to-file ${INSTALL_IS_OPTIONAL} ++ DESTINATION ${CMAKE_INSTALL_BINDIR}) + + if(ARROW_BUILD_INTEGRATION) + add_dependencies(arrow-integration arrow-file-to-stream) diff --git a/ci/conan/all/patches/8.0.0-0001-cmake.patch b/ci/conan/all/patches/8.0.0-0001-cmake.patch new file mode 100644 index 0000000000000..9e67f4a191208 --- /dev/null +++ b/ci/conan/all/patches/8.0.0-0001-cmake.patch @@ -0,0 +1,35 @@ +MIT License + +Copyright (c) 2019 Conan.io + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake +index ab7d2ed..6f1e411 100644 +--- a/cpp/cmake_modules/DefineOptions.cmake ++++ b/cpp/cmake_modules/DefineOptions.cmake +@@ -82,7 +82,7 @@ macro(define_option_string name description default) + endmacro() + + # Top level cmake dir +-if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") ++if(1) + #---------------------------------------------------------------------- + set_option_category("Compile and link") + diff --git a/ci/conan/all/patches/8.0.0-0002-jemalloc.patch b/ci/conan/all/patches/8.0.0-0002-jemalloc.patch new file mode 100644 index 0000000000000..99b92e3308f6f --- /dev/null +++ b/ci/conan/all/patches/8.0.0-0002-jemalloc.patch @@ -0,0 +1,48 @@ +MIT License + +Copyright (c) 2019 Conan.io + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt +index 690c51a..c518b7d 100644 +--- a/cpp/src/arrow/CMakeLists.txt ++++ b/cpp/src/arrow/CMakeLists.txt +@@ -326,7 +326,7 @@ set(ARROW_TESTING_SRCS + + set(_allocator_dependencies "") # Empty list + if(ARROW_JEMALLOC) +- list(APPEND _allocator_dependencies jemalloc_ep) ++ list(APPEND _allocator_dependencies jemalloc::jemalloc) + endif() + if(ARROW_MIMALLOC) + list(APPEND _allocator_dependencies mimalloc_ep) +diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc +index 2fab6f3..1f8f896 100644 +--- a/cpp/src/arrow/memory_pool.cc ++++ b/cpp/src/arrow/memory_pool.cc +@@ -52,7 +52,7 @@ + // Needed to support jemalloc 3 and 4 + #define JEMALLOC_MANGLE + // Explicitly link to our version of jemalloc +-#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h" ++#include "jemalloc/jemalloc.h" + #endif + + #ifdef ARROW_MIMALLOC diff --git a/ci/conan/all/patches/8.0.0-0003-mallctl-takes-size_t.patch b/ci/conan/all/patches/8.0.0-0003-mallctl-takes-size_t.patch new file mode 100644 index 0000000000000..d9f21890416cd --- /dev/null +++ b/ci/conan/all/patches/8.0.0-0003-mallctl-takes-size_t.patch @@ -0,0 +1,35 @@ +MIT License + +Copyright (c) 2019 Conan.io + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc +index 1f8f896..37a89da 100644 +--- a/cpp/src/arrow/memory_pool.cc ++++ b/cpp/src/arrow/memory_pool.cc +@@ -767,7 +767,7 @@ MemoryPool* default_memory_pool() { + + Status jemalloc_set_decay_ms(int ms) { + #ifdef ARROW_JEMALLOC +- ssize_t decay_time_ms = static_cast(ms); ++ size_t decay_time_ms = static_cast(ms); + + int err = mallctl("arenas.dirty_decay_ms", nullptr, nullptr, &decay_time_ms, + sizeof(decay_time_ms)); diff --git a/ci/conan/all/patches/8.0.0-0004-use-find-package.patch b/ci/conan/all/patches/8.0.0-0004-use-find-package.patch new file mode 100644 index 0000000000000..e7bc2320c76d9 --- /dev/null +++ b/ci/conan/all/patches/8.0.0-0004-use-find-package.patch @@ -0,0 +1,401 @@ +MIT License + +Copyright (c) 2019 Conan.io + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt +index aba18c8..bb463d0 100644 +--- a/cpp/CMakeLists.txt ++++ b/cpp/CMakeLists.txt +@@ -721,7 +721,7 @@ if(ARROW_WITH_BZ2) + endif() + + if(ARROW_WITH_LZ4) +- list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4) ++ list(APPEND ARROW_STATIC_LINK_LIBS lz4::lz4) + if(Lz4_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4) + endif() +@@ -907,8 +907,8 @@ endif() + if(ARROW_JEMALLOC) + add_definitions(-DARROW_JEMALLOC) + add_definitions(-DARROW_JEMALLOC_INCLUDE_DIR=${JEMALLOC_INCLUDE_DIR}) +- list(APPEND ARROW_LINK_LIBS jemalloc::jemalloc) +- list(APPEND ARROW_STATIC_LINK_LIBS jemalloc::jemalloc) ++ list(APPEND ARROW_LINK_LIBS jemalloc) ++ list(APPEND ARROW_STATIC_LINK_LIBS jemalloc) + endif() + + if(ARROW_MIMALLOC) +diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake +index f070323..2e2a03b 100644 +--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake ++++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake +@@ -974,6 +974,7 @@ else() + endif() + + if(ARROW_BOOST_REQUIRED) ++if(0) + resolve_dependency(Boost + HAVE_ALT + TRUE +@@ -982,6 +983,9 @@ if(ARROW_BOOST_REQUIRED) + IS_RUNTIME_DEPENDENCY + # libarrow.so doesn't depend on libboost*. + FALSE) ++else() ++ find_package(Boost REQUIRED CONFIG) ++endif() + + if(TARGET Boost::system) + set(BOOST_SYSTEM_LIBRARY Boost::system) +@@ -1059,6 +1063,7 @@ macro(build_snappy) + endmacro() + + if(ARROW_WITH_SNAPPY) ++if(0) + resolve_dependency(Snappy PC_PACKAGE_NAMES snappy) + if(${Snappy_SOURCE} STREQUAL "SYSTEM" AND NOT snappy_PC_FOUND) + get_target_property(SNAPPY_LIB Snappy::snappy IMPORTED_LOCATION) +@@ -1067,6 +1072,9 @@ if(ARROW_WITH_SNAPPY) + # TODO: Don't use global includes but rather target_include_directories + get_target_property(SNAPPY_INCLUDE_DIRS Snappy::snappy INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${SNAPPY_INCLUDE_DIRS}) ++else() ++ find_package(Snappy REQUIRED) ++endif() + endif() + + # ---------------------------------------------------------------------- +@@ -1129,7 +1137,7 @@ macro(build_brotli) + endmacro() + + if(ARROW_WITH_BROTLI) +- resolve_dependency(Brotli PC_PACKAGE_NAMES libbrotlidec libbrotlienc) ++ find_package(Brotli REQUIRED) + # TODO: Don't use global includes but rather target_include_directories + get_target_property(BROTLI_INCLUDE_DIR Brotli::brotlicommon + INTERFACE_INCLUDE_DIRECTORIES) +@@ -1169,8 +1177,16 @@ if(PARQUET_REQUIRE_ENCRYPTION + set(BUILD_SHARED_LIBS_KEEP ${BUILD_SHARED_LIBS}) + set(BUILD_SHARED_LIBS ON) + +- find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED) +- set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_KEEP}) ++ find_package(OpenSSL REQUIRED CONFIG) ++ message("OPENSSL_FOUND: ${OPENSSL_FOUND}") ++ message("OPENSSL_INCLUDE_DIR: ${OPENSSL_INCLUDE_DIR}") ++ message("OPENSSL_CRYPTO_LIBRARY: ${OPENSSL_CRYPTO_LIBRARY}") ++ message("OPENSSL_CRYPTO_LIBRARIES: ${OPENSSL_CRYPTO_LIBRARIES}") ++ message("OPENSSL_SSL_LIBRARY: ${OPENSSL_SSL_LIBRARY}") ++ message("OPENSSL_SSL_LIBRARIES: ${OPENSSL_SSL_LIBRARIES}") ++ message("OPENSSL_LIBRARIES: ${OPENSSL_LIBRARIES}") ++ message("OPENSSL_VERSION: ${OPENSSL_VERSION}") ++ set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_KEEP}) + unset(BUILD_SHARED_LIBS_KEEP) + else() + # Find static OpenSSL headers and libs +@@ -1249,10 +1265,14 @@ macro(build_glog) + endmacro() + + if(ARROW_USE_GLOG) ++if(0) + resolve_dependency(GLOG PC_PACKAGE_NAMES libglog) + # TODO: Don't use global includes but rather target_include_directories + get_target_property(GLOG_INCLUDE_DIR glog::glog INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${GLOG_INCLUDE_DIR}) ++else() ++ find_package(glog REQUIRED) ++endif() + endif() + + # ---------------------------------------------------------------------- +@@ -1321,6 +1341,7 @@ macro(build_gflags) + endmacro() + + if(ARROW_NEED_GFLAGS) ++if(0) + set(ARROW_GFLAGS_REQUIRED_VERSION "2.1.0") + resolve_dependency(gflags + HAVE_ALT +@@ -1339,6 +1360,10 @@ if(ARROW_NEED_GFLAGS) + set(GFLAGS_LIBRARIES gflags_shared) + endif() + endif() ++else() ++ find_package(gflags REQUIRED) ++ set(GFLAGS_LIBRARIES gflags::gflags) ++endif() + endif() + + # ---------------------------------------------------------------------- +@@ -1718,6 +1756,7 @@ if(ARROW_JEMALLOC) + # installations. + # find_package(jemalloc) + ++if(0) + set(ARROW_JEMALLOC_USE_SHARED OFF) + set(JEMALLOC_PREFIX + "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src/jemalloc_ep/dist/") +@@ -1778,6 +1817,9 @@ if(ARROW_JEMALLOC) + INTERFACE_INCLUDE_DIRECTORIES + "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-prefix/src") + add_dependencies(jemalloc::jemalloc jemalloc_ep) ++else() ++ find_package(jemalloc REQUIRED) ++endif() + + list(APPEND ARROW_BUNDLED_STATIC_LIBS jemalloc::jemalloc) + endif() +@@ -1786,6 +1828,7 @@ endif() + # mimalloc - Cross-platform high-performance allocator, from Microsoft + + if(ARROW_MIMALLOC) ++if(0) + message(STATUS "Building (vendored) mimalloc from source") + # We only use a vendored mimalloc as we want to control its build options. + +@@ -1834,6 +1877,10 @@ if(ARROW_MIMALLOC) + endif() + add_dependencies(mimalloc::mimalloc mimalloc_ep) + add_dependencies(toolchain mimalloc_ep) ++else() ++ find_package(mimalloc REQUIRED CONFIG) ++ add_dependencies(toolchain mimalloc::mimalloc) ++endif() + + list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc::mimalloc) + endif() +@@ -2119,6 +2166,7 @@ macro(build_rapidjson) + endmacro() + + if(ARROW_WITH_RAPIDJSON) ++if(0) + set(ARROW_RAPIDJSON_REQUIRED_VERSION "1.1.0") + resolve_dependency(RapidJSON + HAVE_ALT +@@ -2131,6 +2179,10 @@ if(ARROW_WITH_RAPIDJSON) + if(RapidJSON_INCLUDE_DIR) + set(RAPIDJSON_INCLUDE_DIR "${RapidJSON_INCLUDE_DIR}") + endif() ++else() ++ find_package(RapidJSON REQUIRED) ++ set(RAPIDJSON_INCLUDE_DIR "${RapidJSON_INCLUDE_DIR}") ++endif() + + # TODO: Don't use global includes but rather target_include_directories + include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR}) +@@ -2158,8 +2210,14 @@ endmacro() + + if((NOT ARROW_SIMD_LEVEL STREQUAL "NONE") OR (NOT ARROW_RUNTIME_SIMD_LEVEL STREQUAL "NONE" + )) ++if(0) + set(xsimd_SOURCE "BUNDLED") + resolve_dependency(xsimd) ++else() ++ find_package(xsimd) ++ set(XSIMD_INCLUDE_DIR "${xsimd_INCLUDE_DIR}") ++ add_dependencies(toolchain xsimd) ++endif() + # TODO: Don't use global includes but rather target_include_directories + include_directories(SYSTEM ${XSIMD_INCLUDE_DIR}) + endif() +@@ -2202,11 +2260,15 @@ macro(build_zlib) + endmacro() + + if(ARROW_WITH_ZLIB) ++if(0) + resolve_dependency(ZLIB PC_PACKAGE_NAMES zlib) + + # TODO: Don't use global includes but rather target_include_directories + get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) ++else() ++ find_package(ZLIB REQUIRED) ++endif() + endif() + + macro(build_lz4) +@@ -2260,11 +2322,15 @@ macro(build_lz4) + endmacro() + + if(ARROW_WITH_LZ4) ++if(0) + resolve_dependency(Lz4 PC_PACKAGE_NAMES liblz4) + + # TODO: Don't use global includes but rather target_include_directories + get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${LZ4_INCLUDE_DIR}) ++else() ++ find_package(lz4 REQUIRED) ++endif() + endif() + + macro(build_zstd) +@@ -2325,6 +2391,7 @@ macro(build_zstd) + endmacro() + + if(ARROW_WITH_ZSTD) ++if(0) + # ARROW-13384: ZSTD_minCLevel was added in v1.4.0, required by ARROW-13091 + resolve_dependency(zstd + PC_PACKAGE_NAMES +@@ -2352,6 +2419,9 @@ if(ARROW_WITH_ZSTD) + get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} + INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${ZSTD_INCLUDE_DIR}) ++else() ++ find_package(zstd REQUIRED) ++endif() + endif() + + # ---------------------------------------------------------------------- +@@ -2391,6 +2461,7 @@ macro(build_re2) + endmacro() + + if(ARROW_WITH_RE2) ++if(0) + # Don't specify "PC_PACKAGE_NAMES re2" here because re2.pc may + # include -std=c++11. It's not compatible with C source and C++ + # source not uses C++ 11. +@@ -2411,6 +2482,9 @@ if(ARROW_WITH_RE2) + # TODO: Don't use global includes but rather target_include_directories + get_target_property(RE2_INCLUDE_DIR re2::re2 INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${RE2_INCLUDE_DIR}) ++else() ++ find_package(re2 REQUIRED) ++endif() + endif() + + macro(build_bzip2) +@@ -2462,6 +2536,7 @@ macro(build_bzip2) + endmacro() + + if(ARROW_WITH_BZ2) ++if(0) + resolve_dependency(BZip2) + if(${BZip2_SOURCE} STREQUAL "SYSTEM") + string(APPEND ARROW_PC_LIBS_PRIVATE " ${BZIP2_LIBRARIES}") +@@ -2474,6 +2549,9 @@ if(ARROW_WITH_BZ2) + INTERFACE_INCLUDE_DIRECTORIES "${BZIP2_INCLUDE_DIR}") + endif() + include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}") ++else() ++ find_package(BZip2 REQUIRED) ++endif() + endif() + + macro(build_utf8proc) +@@ -2517,6 +2595,7 @@ macro(build_utf8proc) + endmacro() + + if(ARROW_WITH_UTF8PROC) ++if(0) + resolve_dependency(utf8proc + REQUIRED_VERSION + "2.2.0" +@@ -2538,6 +2617,10 @@ if(ARROW_WITH_UTF8PROC) + get_target_property(UTF8PROC_INCLUDE_DIR utf8proc::utf8proc + INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${UTF8PROC_INCLUDE_DIR}) ++else() ++ find_package(utf8proc REQUIRED CONFIG) ++ add_definitions(-DARROW_WITH_UTF8PROC) ++endif() + endif() + + macro(build_cares) +@@ -3702,6 +3785,7 @@ macro(build_grpc) + endmacro() + + if(ARROW_WITH_GRPC) ++if(0) + set(ARROW_GRPC_REQUIRED_VERSION "1.17.0") + if(NOT Protobuf_SOURCE STREQUAL gRPC_SOURCE) + # ARROW-15495: Protobuf/gRPC must come from the same source +@@ -3735,6 +3819,9 @@ if(ARROW_WITH_GRPC) + message(FATAL_ERROR "Cannot find grpc++ headers in ${GRPC_INCLUDE_DIR}") + endif() + endif() ++else() ++ find_package(gRPC REQUIRED CONFIG) ++endif() + endif() + + # ---------------------------------------------------------------------- +@@ -3937,10 +4024,14 @@ macro(build_google_cloud_cpp_storage) + endmacro() + + if(ARROW_WITH_GOOGLE_CLOUD_CPP) ++if(0) + resolve_dependency(google_cloud_cpp_storage) + get_target_property(google_cloud_cpp_storage_INCLUDE_DIR google-cloud-cpp::storage + INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${google_cloud_cpp_storage_INCLUDE_DIR}) ++else() ++ find_package(google-cloud-cpp REQUIRED) ++endif() + get_target_property(absl_base_INCLUDE_DIR absl::base INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${absl_base_INCLUDE_DIR}) + message(STATUS "Found google-cloud-cpp::storage headers: ${google_cloud_cpp_storage_INCLUDE_DIR}" +@@ -4261,6 +4352,7 @@ macro(build_opentelemetry) + endmacro() + + if(ARROW_WITH_OPENTELEMETRY) ++if(0) + # cURL is required whether we build from source or use an existing installation + # (OTel's cmake files do not call find_curl for you) + find_curl() +@@ -4269,7 +4361,10 @@ if(ARROW_WITH_OPENTELEMETRY) + get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::api + INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${OPENTELEMETRY_INCLUDE_DIR}) +- message(STATUS "Found OpenTelemetry headers: ${OPENTELEMETRY_INCLUDE_DIR}") ++else() ++ find_package(opentelemetry-cpp REQUIRED) ++endif() ++ message(STATUS "Found OpenTelemetry headers: ${OPENTELEMETRY_INCLUDE_DIR}") + endif() + + # ---------------------------------------------------------------------- +diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt +index c518b7d..40b4853 100644 +--- a/cpp/src/arrow/CMakeLists.txt ++++ b/cpp/src/arrow/CMakeLists.txt +@@ -584,6 +584,10 @@ foreach(LIB_TARGET ${ARROW_LIBRARIES}) + target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_EXPORTING) + endforeach() + ++if(ARROW_BUILD_SHARED AND WIN32) ++ target_compile_definitions(arrow_shared PRIVATE ARROW_EXPORTING) ++endif() ++ + if(ARROW_WITH_BACKTRACE) + find_package(Backtrace) + +@@ -594,7 +598,7 @@ if(ARROW_WITH_BACKTRACE) + endforeach() + endif() + +-if(ARROW_BUILD_BUNDLED_DEPENDENCIES) ++if(0) + arrow_car(_FIRST_LIB ${ARROW_BUNDLED_STATIC_LIBS}) + arrow_cdr(_OTHER_LIBS ${ARROW_BUNDLED_STATIC_LIBS}) + create_merged_static_lib(arrow_bundled_dependencies diff --git a/ci/conan/all/patches/8.0.0-0005-install-utils.patch b/ci/conan/all/patches/8.0.0-0005-install-utils.patch new file mode 100644 index 0000000000000..98075913ed109 --- /dev/null +++ b/ci/conan/all/patches/8.0.0-0005-install-utils.patch @@ -0,0 +1,65 @@ +MIT License + +Copyright (c) 2019 Conan.io + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt +index aba18c8..bb463d0 100644 +--- a/cpp/CMakeLists.txt ++++ b/cpp/CMakeLists.txt +@@ -721,7 +721,7 @@ if(ARROW_WITH_BZ2) + endif() + + if(ARROW_WITH_LZ4) +- list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4) ++ list(APPEND ARROW_STATIC_LINK_LIBS lz4::lz4) + if(Lz4_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4) + endif() +@@ -907,8 +907,8 @@ endif() + if(ARROW_JEMALLOC) + add_definitions(-DARROW_JEMALLOC) + add_definitions(-DARROW_JEMALLOC_INCLUDE_DIR=${JEMALLOC_INCLUDE_DIR}) +- list(APPEND ARROW_LINK_LIBS jemalloc::jemalloc) +- list(APPEND ARROW_STATIC_LINK_LIBS jemalloc::jemalloc) ++ list(APPEND ARROW_LINK_LIBS jemalloc) ++ list(APPEND ARROW_STATIC_LINK_LIBS jemalloc) + endif() + + if(ARROW_MIMALLOC) +diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt +index 495018e..3dcb35d 100644 +--- a/cpp/src/arrow/ipc/CMakeLists.txt ++++ b/cpp/src/arrow/ipc/CMakeLists.txt +@@ -61,9 +61,13 @@ endif() + if(ARROW_BUILD_UTILITIES OR ARROW_BUILD_INTEGRATION) + add_executable(arrow-file-to-stream file_to_stream.cc) + target_link_libraries(arrow-file-to-stream ${ARROW_UTIL_LIB}) ++ install(TARGETS arrow-file-to-stream ${INSTALL_IS_OPTIONAL} ++ DESTINATION ${CMAKE_INSTALL_BINDIR}) + add_executable(arrow-stream-to-file stream_to_file.cc) + target_link_libraries(arrow-stream-to-file ${ARROW_UTIL_LIB}) +- ++ install(TARGETS arrow-stream-to-file ${INSTALL_IS_OPTIONAL} ++ DESTINATION ${CMAKE_INSTALL_BINDIR}) ++ + if(ARROW_BUILD_INTEGRATION) + add_dependencies(arrow-integration arrow-file-to-stream) + add_dependencies(arrow-integration arrow-stream-to-file) diff --git a/ci/conan/all/test_package/CMakeLists.txt b/ci/conan/all/test_package/CMakeLists.txt index 508cb44890427..0df89423c14ef 100644 --- a/ci/conan/all/test_package/CMakeLists.txt +++ b/ci/conan/all/test_package/CMakeLists.txt @@ -20,15 +20,15 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.8) project(test_package) include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake) conan_basic_setup() -find_package(Arrow) +find_package(Arrow REQUIRED) add_executable(${PROJECT_NAME} test_package.cpp) target_link_libraries(${PROJECT_NAME} arrow::arrow) -set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11) +target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_11) target_compile_definitions(${PROJECT_NAME} PRIVATE WITH_JEMALLOC) diff --git a/ci/conan/config.yml b/ci/conan/config.yml index 75c894c63f602..b38fe36930ca3 100644 --- a/ci/conan/config.yml +++ b/ci/conan/config.yml @@ -21,6 +21,10 @@ # SOFTWARE. versions: + "8.0.1": + folder: all + "8.0.0": + folder: all "7.0.0": folder: all "2.0.0": diff --git a/ci/conan/merge_status.sh b/ci/conan/merge_status.sh index daed3b81eb685..0e047ab753a7c 100644 --- a/ci/conan/merge_status.sh +++ b/ci/conan/merge_status.sh @@ -15,4 +15,4 @@ # specific language governing permissions and limitations # under the License. -UPSTREAM_REVISION=232a32d832f9754b81dde348e8fd8ded37ad404b +UPSTREAM_REVISION=d659daa941be8ea8d2b22e8802c61f15688c67d5 diff --git a/ci/conan/merge_upstream.sh b/ci/conan/merge_upstream.sh index 3d2c3334250fc..76af58f70df5e 100755 --- a/ci/conan/merge_upstream.sh +++ b/ci/conan/merge_upstream.sh @@ -37,7 +37,7 @@ git \ diff \ ${UPSTREAM_REVISION}..${UPSTREAM_HEAD} \ recipes/arrow | \ - (cd "${source_dir}" && patch -p3) + (cd "${source_dir}" && patch -p3 || :) sed \ -i.bak \ diff --git a/ci/conda_env_gandiva_win.txt b/ci/conda_env_gandiva_win.txt index 9098b53d1f53c..621b6f5a99d0d 100644 --- a/ci/conda_env_gandiva_win.txt +++ b/ci/conda_env_gandiva_win.txt @@ -15,6 +15,5 @@ # specific language governing permissions and limitations # under the License. -# llvmdev=9 or later require Visual Studio 2017 -clangdev=8 -llvmdev=8 +clangdev +llvmdev diff --git a/ci/docker/alpine-linux-3.16-cpp.dockerfile b/ci/docker/alpine-linux-3.16-cpp.dockerfile new file mode 100644 index 0000000000000..1f4ed4c47be93 --- /dev/null +++ b/ci/docker/alpine-linux-3.16-cpp.dockerfile @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +FROM ${arch}/alpine:3.16 + +RUN apk add \ + bash \ + benchmark-dev \ + boost-dev \ + brotli-dev \ + bzip2-dev \ + c-ares-dev \ + ccache \ + clang \ + cmake \ + curl-dev \ + g++ \ + gcc \ + gdb \ + gflags-dev \ + git \ + glog-dev \ + gmock \ + grpc-dev \ + gtest-dev \ + libxml2-dev \ + llvm13-dev \ + llvm13-static \ + lz4-dev \ + make \ + musl-locales \ + nlohmann-json \ + openssl-dev \ + perl \ + pkgconfig \ + protobuf-dev \ + py3-pip \ + py3-numpy-dev \ + python3-dev \ + rapidjson-dev \ + re2-dev \ + rsync \ + samurai \ + snappy-dev \ + sqlite-dev \ + thrift-dev \ + tzdata \ + utf8proc-dev \ + zlib-dev \ + zstd-dev && \ + rm -rf /var/cache/apk/* && \ + ln -s /usr/share/zoneinfo/Etc/UTC /etc/localtime && \ + echo "Etc/UTC" > /etc/timezone + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh latest /usr/local + +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh default + +ENV ARROW_BUILD_TESTS=ON \ + ARROW_DATASET=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_FLIGHT=ON \ + ARROW_FLIGHT_SQL=ON \ + ARROW_GANDIVA=ON \ + ARROW_GCS=ON \ + ARROW_HOME=/usr/local \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=ON \ + ARROW_S3=ON \ + ARROW_USE_CCACHE=ON \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_OPENTELEMETRY=OFF \ + ARROW_WITH_MUSL=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ + CMAKE_CXX_STANDARD=17 \ + google_cloud_cpp_storage_SOURCE=BUNDLED \ + ORC_SOURCE=BUNDLED \ + PATH=/usr/lib/ccache/:$PATH \ + xsimd_SOURCE=BUNDLED diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index 8bcf5954d1db7..cce6a287f86be 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -27,6 +27,8 @@ ARG go=1.15 # Install Archery and integration dependencies COPY ci/conda_env_archery.txt /arrow/ci/ + +# Pin zlib to 1.2.11 due to ARROW-17410 until the patch is released RUN mamba install -q -y \ --file arrow/ci/conda_env_archery.txt \ "python>=3.7" \ @@ -35,7 +37,8 @@ RUN mamba install -q -y \ maven=${maven} \ nodejs=${node} \ yarn \ - openjdk=${jdk} && \ + openjdk=${jdk} \ + zlib=1.2.11 && \ mamba clean --all --force-pkgs-dirs # Install Rust with only the needed components diff --git a/ci/docker/debian-10-cpp.dockerfile b/ci/docker/debian-10-cpp.dockerfile index aae789164be26..a0872928c57fa 100644 --- a/ci/docker/debian-10-cpp.dockerfile +++ b/ci/docker/debian-10-cpp.dockerfile @@ -76,7 +76,8 @@ RUN apt-get update -y -q && \ COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local -ENV ARROW_BUILD_TESTS=ON \ +ENV absl_SOURCE=BUNDLED \ + ARROW_BUILD_TESTS=ON \ ARROW_DATASET=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ ARROW_FLIGHT=ON \ diff --git a/ci/docker/debian-10-go.dockerfile b/ci/docker/debian-10-go.dockerfile index f0c0522081d1c..dfe81f5a73ced 100644 --- a/ci/docker/debian-10-go.dockerfile +++ b/ci/docker/debian-10-go.dockerfile @@ -16,12 +16,15 @@ # under the License. ARG arch=amd64 -ARG go=1.15 +ARG go=1.16 +ARG staticcheck=v0.2.2 FROM ${arch}/golang:${go}-buster -RUN GO111MODULE=on go install honnef.co/go/tools/cmd/staticcheck@v0.2.2 +# FROM collects all the args, get back the staticcheck version arg +ARG staticcheck -# TODO(kszucs): -# 1. add the files required to install the dependencies to .dockerignore -# 2. copy these files to their appropriate path -# 3. download and compile the dependencies +RUN GO111MODULE=on go install honnef.co/go/tools/cmd/staticcheck@${staticcheck} + +# Copy the go.mod and go.sum over and pre-download all the dependencies +COPY go/ /arrow/go +RUN cd /arrow/go && go mod download diff --git a/ci/docker/debian-11-cpp.dockerfile b/ci/docker/debian-11-cpp.dockerfile index 790a22dcd8072..a403df2368f7e 100644 --- a/ci/docker/debian-11-cpp.dockerfile +++ b/ci/docker/debian-11-cpp.dockerfile @@ -78,7 +78,8 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default -ENV ARROW_BUILD_TESTS=ON \ +ENV absl_SOURCE=BUNDLED \ + ARROW_BUILD_TESTS=ON \ ARROW_DATASET=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ ARROW_FLIGHT=ON \ diff --git a/ci/docker/debian-11-go.dockerfile b/ci/docker/debian-11-go.dockerfile index 33f523e36aa00..32d7b3af39018 100644 --- a/ci/docker/debian-11-go.dockerfile +++ b/ci/docker/debian-11-go.dockerfile @@ -17,11 +17,13 @@ ARG arch=amd64 ARG go=1.16 +ARG staticcheck=v0.2.2 FROM ${arch}/golang:${go}-bullseye -RUN GO111MODULE=on go install honnef.co/go/tools/cmd/staticcheck@v0.2.2 +# FROM collects all the args, get back the staticcheck version arg +ARG staticcheck +RUN GO111MODULE=on go install honnef.co/go/tools/cmd/staticcheck@${staticcheck} -# TODO(kszucs): -# 1. add the files required to install the dependencies to .dockerignore -# 2. copy these files to their appropriate path -# 3. download and compile the dependencies +# Copy the go.mod and go.sum over and pre-download all the dependencies +COPY go/ /arrow/go +RUN cd /arrow/go && go mod download diff --git a/ci/docker/fedora-35-cpp.dockerfile b/ci/docker/fedora-35-cpp.dockerfile index cbe9ee7539ac2..ce9c8857c85c3 100644 --- a/ci/docker/fedora-35-cpp.dockerfile +++ b/ci/docker/fedora-35-cpp.dockerfile @@ -71,7 +71,8 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh default -ENV ARROW_BUILD_TESTS=ON \ +ENV absl_SOURCE=BUNDLED \ + ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ ARROW_DATASET=ON \ ARROW_FLIGHT=ON \ diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile index 52bdb9b923dec..de953fd5ae057 100644 --- a/ci/docker/java-jni-manylinux-201x.dockerfile +++ b/ci/docker/java-jni-manylinux-201x.dockerfile @@ -33,5 +33,13 @@ RUN vcpkg install \ # Install Java ARG java=1.8.0 -RUN yum install -y java-$java-openjdk-devel && yum clean all +RUN yum install -y java-$java-openjdk-devel rh-maven35 && yum clean all ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/ + +# For ci/scripts/java_*.sh +ENV ARROW_GANDIVA_JAVA=ON \ + ARROW_HOME=/tmp/local \ + ARROW_JAVA_CDATA=ON \ + ARROW_JNI=ON \ + ARROW_PLASMA=ON \ + ARROW_USE_CCACHE=ON diff --git a/ci/docker/linux-apt-python-3.dockerfile b/ci/docker/linux-apt-python-3.dockerfile index ece7cf09129cd..78f6a12c1c276 100644 --- a/ci/docker/linux-apt-python-3.dockerfile +++ b/ci/docker/linux-apt-python-3.dockerfile @@ -39,6 +39,12 @@ RUN pip install \ -r arrow/python/requirements-build.txt \ -r arrow/python/requirements-test.txt +ARG numba +COPY ci/scripts/install_numba.sh /arrow/ci/scripts/ +RUN if [ "${numba}" != "" ]; then \ + /arrow/ci/scripts/install_numba.sh ${numba} \ + ; fi + ENV ARROW_PYTHON=ON \ ARROW_BUILD_STATIC=OFF \ ARROW_BUILD_TESTS=OFF \ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 24d5f8e5da4ad..dd36aff84c533 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -119,13 +119,15 @@ RUN /arrow/ci/scripts/install_ceph.sh # Prioritize system packages and local installation # The following dependencies will be downloaded due to missing/invalid packages # provided by the distribution: +# - Abseil is not packaged # - libc-ares-dev does not install CMake config files # - flatbuffer is not packaged # - libgtest-dev only provide sources # - libprotobuf-dev only provide sources # ARROW-17051: this build uses static Protobuf, so we must also use # static Arrow to run Flight/Flight SQL tests -ENV ARROW_BUILD_STATIC=ON \ +ENV absl_SOURCE=BUNDLED \ + ARROW_BUILD_STATIC=ON \ ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ ARROW_DATASET=ON \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index c2019df153ac3..05aca53151bea 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -146,13 +146,15 @@ RUN /arrow/ci/scripts/install_gcs_testbench.sh default # Prioritize system packages and local installation # The following dependencies will be downloaded due to missing/invalid packages # provided by the distribution: +# - Abseil is old # - libc-ares-dev does not install CMake config files # - flatbuffer is not packaged # - libgtest-dev only provide sources # - libprotobuf-dev only provide sources # ARROW-17051: this build uses static Protobuf, so we must also use # static Arrow to run Flight/Flight SQL tests -ENV ARROW_BUILD_STATIC=ON \ +ENV absl_SOURCE=BUNDLED \ + ARROW_BUILD_STATIC=ON \ ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ ARROW_DATASET=ON \ diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index 428447d263843..f0a09bab7f0e4 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=8.0.0.9000 +pkgver=9.0.0.9000 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") diff --git a/ci/scripts/conan_build.sh b/ci/scripts/conan_build.sh index 808e45e11fdce..3bdd6a0bd5f72 100755 --- a/ci/scripts/conan_build.sh +++ b/ci/scripts/conan_build.sh @@ -34,18 +34,30 @@ fi if [ -n "${ARROW_CONAN_WITH_BROTLI:-}" ]; then conan_args+=(--options arrow:with_brotli=${ARROW_CONAN_WITH_BROTLI}) fi +if [ -n "${ARROW_CONAN_WITH_BZ2:-}" ]; then + conan_args+=(--options arrow:with_bz2=${ARROW_CONAN_WITH_BZ2}) +fi +if [ -n "${ARROW_CONAN_WITH_FLIGHT_RPC:-}" ]; then + conan_args+=(--options arrow:with_flight_rpc=${ARROW_CONAN_WITH_FLIGHT_RPC}) +fi if [ -n "${ARROW_CONAN_WITH_GLOG:-}" ]; then conan_args+=(--options arrow:with_glog=${ARROW_CONAN_WITH_GLOG}) fi if [ -n "${ARROW_CONAN_WITH_JEMALLOC:-}" ]; then conan_args+=(--options arrow:with_jemalloc=${ARROW_CONAN_WITH_JEMALLOC}) fi +if [ -n "${ARROW_CONAN_WITH_JSON:-}" ]; then + conan_args+=(--options arrow:with_json=${ARROW_CONAN_WITH_JSON}) +fi if [ -n "${ARROW_CONAN_WITH_LZ4:-}" ]; then conan_args+=(--options arrow:with_lz4=${ARROW_CONAN_WITH_LZ4}) fi if [ -n "${ARROW_CONAN_WITH_SNAPPY:-}" ]; then conan_args+=(--options arrow:with_snappy=${ARROW_CONAN_WITH_SNAPPY}) fi +if [ -n "${ARROW_CONAN_WITH_ZSTD:-}" ]; then + conan_args+=(--options arrow:with_zstd=${ARROW_CONAN_WITH_ZSTD}) +fi version=$(grep '^set(ARROW_VERSION ' ${ARROW_HOME}/cpp/CMakeLists.txt | \ grep -E -o '([0-9.]*)') diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index bac8acaadee21..c7d200697820a 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -69,6 +69,7 @@ mkdir -p ${build_dir} pushd ${build_dir} cmake \ + -Dabsl_SOURCE=${absl_SOURCE:-} \ -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \ -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \ -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ @@ -126,6 +127,7 @@ cmake \ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \ -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY:-OFF} \ + -DARROW_WITH_MUSL=${ARROW_WITH_MUSL:-OFF} \ -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \ -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \ @@ -171,7 +173,7 @@ time cmake --build . --target install popd if [ -x "$(command -v ldconfig)" ]; then - ldconfig + ldconfig ${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib} fi if [ "${ARROW_USE_CCACHE}" == "ON" ]; then diff --git a/ci/scripts/go_build.sh b/ci/scripts/go_build.sh index 20879cc0e704e..43f348b153814 100755 --- a/ci/scripts/go_build.sh +++ b/ci/scripts/go_build.sh @@ -36,17 +36,15 @@ if [[ -n "${ARROW_GO_TESTCGO}" ]]; then go clean -cache go clean -testcache fi - TAGS="-tags assert,test,ccalloc" + TAGS="-tags assert,test,ccalloc" fi -go get -d -t -v ./... go install $TAGS -v ./... popd pushd ${source_dir}/parquet -go get -d -t -v ./... go install -v ./... popd diff --git a/ci/scripts/go_test.sh b/ci/scripts/go_test.sh index 9923cff6330f5..0c07e0fc6bf3e 100755 --- a/ci/scripts/go_test.sh +++ b/ci/scripts/go_test.sh @@ -19,18 +19,25 @@ set -ex +ver=`go env GOVERSION` + source_dir=${1}/go testargs="-race" +if [[ "${ver#go}" =~ ^1\.1[8-9] ]] && [ "$(go env GOOS)" != "darwin" ]; then + # asan not supported on darwin/amd64 + testargs="-asan" +fi + case "$(uname)" in MINGW*) - # -race doesn't work on windows currently + # -asan and -race don't work on windows currently testargs="" ;; esac if [[ "$(go env GOHOSTARCH)" = "s390x" ]]; then - testargs="" # -race not supported on s390x + testargs="" # -race and -asan not supported on s390x fi # Go static check (skipped in MinGW) @@ -44,9 +51,9 @@ fi pushd ${source_dir}/arrow TAGS="assert,test" -if [[ -n "${ARROW_GO_TESTCGO}" ]]; then +if [[ -n "${ARROW_GO_TESTCGO}" ]]; then if [[ "${MSYSTEM}" = "MINGW64" ]]; then - export PATH=${MINGW_PREFIX}/bin:$PATH + export PATH=${MINGW_PREFIX}/bin:$PATH fi TAGS="${TAGS},ccalloc" fi @@ -56,9 +63,7 @@ fi # tag in order to run its tests so that the testing functions implemented # in .c files don't get included in non-test builds. -for d in $(go list ./... | grep -v vendor); do - go test $testargs -tags $TAGS $d -done +go test $testargs -tags $TAGS ./... popd @@ -66,8 +71,6 @@ export PARQUET_TEST_DATA=${1}/cpp/submodules/parquet-testing/data pushd ${source_dir}/parquet -for d in $(go list ./... | grep -v vendor); do - go test $testargs -tags assert $d -done +go test $testargs -tags assert ./... popd diff --git a/ci/scripts/install_ccache.sh b/ci/scripts/install_ccache.sh index 8c64fe56c41cb..7d39e18ebe514 100755 --- a/ci/scripts/install_ccache.sh +++ b/ci/scripts/install_ccache.sh @@ -26,20 +26,33 @@ fi version=$1 prefix=$2 -url="https://github.com/ccache/ccache/archive/v${version}.tar.gz" -mkdir /tmp/ccache -wget -q ${url} -O - | tar -xzf - --directory /tmp/ccache --strip-components=1 +mkdir -p /tmp/ccache +case $(uname) in + MINGW64*) + url="https://github.com/ccache/ccache/releases/download/v${version}/ccache-${version}-windows-x86_64.zip" + pushd /tmp/ccache + curl --fail --location --remote-name ${url} + unzip -j ccache-${version}-windows-x86_64.zip + chmod +x ccache.exe + mv ccache.exe ${prefix}/bin/ + popd + ;; + *) + url="https://github.com/ccache/ccache/archive/v${version}.tar.gz" -mkdir /tmp/ccache/build -pushd /tmp/ccache/build -cmake \ - -GNinja \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=${prefix} \ - -DZSTD_FROM_INTERNET=ON \ - .. -ninja install -popd + wget -q ${url} -O - | tar -xzf - --directory /tmp/ccache --strip-components=1 + mkdir /tmp/ccache/build + pushd /tmp/ccache/build + cmake \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=${prefix} \ + -DZSTD_FROM_INTERNET=ON \ + .. + ninja install + popd + ;; +esac rm -rf /tmp/ccache diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh index f7fa6e611d4d8..0e56310662669 100755 --- a/ci/scripts/install_gcs_testbench.sh +++ b/ci/scripts/install_gcs_testbench.sh @@ -38,20 +38,20 @@ case "$(uname -s)-$(uname -m)" in Darwin-arm64) # Workaround for https://github.com/grpc/grpc/issues/28387 . # Build grpcio instead of using wheel. - # storage-testbench 0.16.0 pins grpcio to 1.44.0. - ${PYTHON:-python3} -m pip install --no-binary :all: "grpcio==1.44.0" + # storage-testbench 0.27.0 pins grpcio to 1.46.1. + ${PYTHON:-python3} -m pip install --no-binary :all: "grpcio==1.46.1" ;; *_NT-*) # Mingw-w64: MSYS_NT-10.0-19043, MINGW32_NT-10.0-19043, MINGW64_NT-10.0-19043 # Don't use the "/MT" option because g++ doesn't recognize it. # "/MT" is for Visual Studio. - GRPC_PYTHON_CFLAGS=" " ${PYTHON:-python3} -m pip install "grpcio==1.44.0" + GRPC_PYTHON_CFLAGS=" " ${PYTHON:-python3} -m pip install "grpcio==1.46.1" ;; esac version=$1 if [[ "${version}" -eq "default" ]]; then - version="v0.16.0" + version="v0.27.0" fi ${PYTHON:-python3} -m pip install \ diff --git a/ci/scripts/install_numba.sh b/ci/scripts/install_numba.sh new file mode 100755 index 0000000000000..470f291ba80a9 --- /dev/null +++ b/ci/scripts/install_numba.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +numba=$1 + +if [ "${numba}" = "master" ]; then + pip install https://github.com/numba/numba/archive/main.tar.gz#egg=numba +elif [ "${numba}" = "latest" ]; then + pip install numba +else + pip install numba==${numba} +fi diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh index b0362868b0a4e..ac252f55b37b5 100755 --- a/ci/scripts/java_build.sh +++ b/ci/scripts/java_build.sh @@ -18,10 +18,13 @@ set -ex +if [[ "${ARROW_JAVA_BUILD:-ON}" != "ON" ]]; then + exit +fi + arrow_dir=${1} source_dir=${1}/java build_dir=${2} -cpp_build_dir=${build_dir}/cpp/${ARROW_BUILD_TYPE:-debug} java_jni_dist_dir=${3} : ${BUILD_DOCS_JAVA:=OFF} @@ -64,6 +67,11 @@ if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then fi mvn="mvn -B -DskipTests -Drat.skip=true -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" + +if [ $ARROW_JAVA_SKIP_GIT_PLUGIN ]; then + mvn="${mvn} -Dmaven.gitcommitid.skip=true" +fi + # Use `2 * ncores` threads mvn="${mvn} -T 2C" @@ -80,7 +88,7 @@ if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then fi if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then - ${mvn} -Darrow.cpp.build.dir=${cpp_build_dir} -Parrow-jni install + ${mvn} -Darrow.cpp.build.dir=${java_jni_dist_dir} -Parrow-jni install fi if [ "${ARROW_PLASMA}" = "ON" ]; then diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 590c469e398ff..5418daaf0113b 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -29,6 +29,7 @@ echo "=== Clear output directories and leftovers ===" rm -rf ${build_dir} echo "=== Building Arrow C++ libraries ===" +install_dir=${build_dir}/cpp-install : ${ARROW_BUILD_TESTS:=OFF} : ${ARROW_DATASET:=ON} : ${ARROW_FILESYSTEM:=ON} @@ -40,9 +41,15 @@ echo "=== Building Arrow C++ libraries ===" : ${ARROW_PLASMA:=ON} : ${ARROW_PYTHON:=OFF} : ${ARROW_S3:=ON} +: ${ARROW_USE_CCACHE:=OFF} : ${CMAKE_BUILD_TYPE:=Release} : ${CMAKE_UNITY_BUILD:=ON} +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics before build ===" + ccache -s +fi + export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" export AWS_EC2_METADATA_DISABLED=TRUE @@ -75,12 +82,13 @@ cmake \ -DARROW_S3=${ARROW_S3} \ -DARROW_SNAPPY_USE_SHARED=OFF \ -DARROW_THRIFT_USE_SHARED=OFF \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ -DARROW_UTF8PROC_USE_SHARED=OFF \ -DARROW_ZSTD_USE_SHARED=OFF \ -DAWSSDK_SOURCE=BUNDLED \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_LIBDIR=lib \ - -DCMAKE_INSTALL_PREFIX=${build_dir}/cpp \ + -DCMAKE_INSTALL_PREFIX=${install_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ -DPARQUET_BUILD_EXAMPLES=OFF \ -DPARQUET_BUILD_EXECUTABLES=OFF \ @@ -102,12 +110,17 @@ ${arrow_dir}/ci/scripts/java_jni_build.sh \ ${build_dir} \ ${dist_dir} +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics after build ===" + ccache -s +fi echo "=== Copying libraries to the distribution folder ===" mkdir -p "${dist_dir}" -cp -L ${build_dir}/cpp/lib/libgandiva_jni.dylib ${dist_dir} -cp -L ${build_dir}/cpp/lib/libarrow_dataset_jni.dylib ${dist_dir} -cp -L ${build_dir}/cpp/lib/libarrow_orc_jni.dylib ${dist_dir} +cp -L ${install_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir} +cp -L ${install_dir}/lib/libarrow_orc_jni.dylib ${dist_dir} +cp -L ${install_dir}/lib/libgandiva_jni.dylib ${dist_dir} +cp -L ${build_dir}/cpp/*/libplasma_java.dylib ${dist_dir} echo "=== Checking shared dependencies for libraries ===" @@ -122,9 +135,11 @@ archery linking check-dependencies \ --allow libcurl \ --allow libgandiva_jni \ --allow libncurses \ + --allow libplasma_java \ --allow libz \ libarrow_cdata_jni.dylib \ libarrow_dataset_jni.dylib \ libarrow_orc_jni.dylib \ - libgandiva_jni.dylib + libgandiva_jni.dylib \ + libplasma_java.dylib popd diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 008f19140ee0e..331d74b34a1f4 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -32,6 +32,7 @@ echo "=== Building Arrow C++ libraries ===" devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ grep -o "^[0-9]*") devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" +: ${ARROW_BUILD_TESTS:=OFF} : ${ARROW_DATASET:=ON} : ${ARROW_GANDIVA:=ON} : ${ARROW_GANDIVA_JAVA:=ON} @@ -44,7 +45,7 @@ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/includ : ${ARROW_PLASMA_JAVA_CLIENT:=ON} : ${ARROW_PYTHON:=OFF} : ${ARROW_S3:=ON} -: ${ARROW_BUILD_TESTS:=OFF} +: ${ARROW_USE_CCACHE:=OFF} : ${CMAKE_BUILD_TYPE:=Release} : ${CMAKE_UNITY_BUILD:=ON} : ${VCPKG_ROOT:=/opt/vcpkg} @@ -52,6 +53,11 @@ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/includ : ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} : ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-isystem;-lpthread} +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics before build ===" + ccache -s +fi + export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" export AWS_EC2_METADATA_DISABLED=TRUE @@ -87,11 +93,12 @@ cmake \ -DARROW_S3=${ARROW_S3} \ -DARROW_SNAPPY_USE_SHARED=OFF \ -DARROW_THRIFT_USE_SHARED=OFF \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ -DARROW_UTF8PROC_USE_SHARED=OFF \ -DARROW_ZSTD_USE_SHARED=OFF \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_LIBDIR=lib \ - -DCMAKE_INSTALL_PREFIX=${build_dir}/cpp \ + -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ -DORC_SOURCE=BUNDLED \ -DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \ @@ -126,11 +133,17 @@ ${arrow_dir}/ci/scripts/java_jni_build.sh \ ${build_dir} \ ${dist_dir} +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics after build ===" + ccache -s +fi + echo "=== Copying libraries to the distribution folder ===" -cp -L ${build_dir}/cpp/lib/libgandiva_jni.so ${dist_dir} -cp -L ${build_dir}/cpp/lib/libarrow_dataset_jni.so ${dist_dir} -cp -L ${build_dir}/cpp/lib/libarrow_orc_jni.so ${dist_dir} +cp -L ${ARROW_HOME}/lib/libarrow_dataset_jni.so ${dist_dir} +cp -L ${ARROW_HOME}/lib/libarrow_orc_jni.so ${dist_dir} +cp -L ${ARROW_HOME}/lib/libgandiva_jni.so ${dist_dir} +cp -L ${build_dir}/cpp/*/libplasma_java.so ${dist_dir} echo "=== Checking shared dependencies for libraries ===" @@ -149,5 +162,6 @@ archery linking check-dependencies \ libarrow_cdata_jni.so \ libarrow_dataset_jni.so \ libarrow_orc_jni.so \ - libgandiva_jni.so + libgandiva_jni.so \ + libplasma_java.so popd diff --git a/ci/scripts/java_test.sh b/ci/scripts/java_test.sh index 83ef26fdb1a51..bb30894d9eff0 100755 --- a/ci/scripts/java_test.sh +++ b/ci/scripts/java_test.sh @@ -18,9 +18,12 @@ set -ex +if [[ "${ARROW_JAVA_TEST:-ON}" != "ON" ]]; then + exit +fi + arrow_dir=${1} source_dir=${1}/java -cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} java_jni_dist_dir=${3} # For JNI and Plasma tests @@ -36,7 +39,7 @@ pushd ${source_dir} ${mvn} test if [ "${ARROW_JNI}" = "ON" ]; then - ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir} + ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${java_jni_dist_dir} fi if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then @@ -46,7 +49,7 @@ fi if [ "${ARROW_PLASMA}" = "ON" ]; then pushd ${source_dir}/plasma java -cp target/test-classes:target/classes \ - -Djava.library.path=${cpp_build_dir} \ + -Djava.library.path=${java_jni_dist_dir} \ org.apache.arrow.plasma.PlasmaClientTest popd fi diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh index fba0fa26045da..0514bdcef7d07 100755 --- a/ci/scripts/msys2_setup.sh +++ b/ci/scripts/msys2_setup.sh @@ -27,19 +27,23 @@ case "${target}" in packages+=(${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp) packages+=(${MINGW_PACKAGE_PREFIX}-boost) packages+=(${MINGW_PACKAGE_PREFIX}-brotli) + packages+=(${MINGW_PACKAGE_PREFIX}-bzip2) + packages+=(${MINGW_PACKAGE_PREFIX}-c-ares) + packages+=(${MINGW_PACKAGE_PREFIX}-cc) packages+=(${MINGW_PACKAGE_PREFIX}-ccache) packages+=(${MINGW_PACKAGE_PREFIX}-clang) packages+=(${MINGW_PACKAGE_PREFIX}-cmake) - packages+=(${MINGW_PACKAGE_PREFIX}-gcc) + packages+=(${MINGW_PACKAGE_PREFIX}-double-conversion) + packages+=(${MINGW_PACKAGE_PREFIX}-flatbuffers) packages+=(${MINGW_PACKAGE_PREFIX}-gflags) packages+=(${MINGW_PACKAGE_PREFIX}-grpc) packages+=(${MINGW_PACKAGE_PREFIX}-gtest) packages+=(${MINGW_PACKAGE_PREFIX}-libutf8proc) packages+=(${MINGW_PACKAGE_PREFIX}-libxml2) packages+=(${MINGW_PACKAGE_PREFIX}-lz4) - packages+=(${MINGW_PACKAGE_PREFIX}-make) packages+=(${MINGW_PACKAGE_PREFIX}-ninja) packages+=(${MINGW_PACKAGE_PREFIX}-nlohmann-json) + packages+=(${MINGW_PACKAGE_PREFIX}-openssl) packages+=(${MINGW_PACKAGE_PREFIX}-protobuf) packages+=(${MINGW_PACKAGE_PREFIX}-python-cffi) packages+=(${MINGW_PACKAGE_PREFIX}-python-numpy) @@ -50,6 +54,7 @@ case "${target}" in packages+=(${MINGW_PACKAGE_PREFIX}-snappy) packages+=(${MINGW_PACKAGE_PREFIX}-thrift) packages+=(${MINGW_PACKAGE_PREFIX}-xsimd) + packages+=(${MINGW_PACKAGE_PREFIX}-uriparser) packages+=(${MINGW_PACKAGE_PREFIX}-zlib) packages+=(${MINGW_PACKAGE_PREFIX}-zstd) ;; diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh index 4e2990b84d6f5..a18bc5b9133ed 100755 --- a/ci/scripts/python_test.sh +++ b/ci/scripts/python_test.sh @@ -20,6 +20,7 @@ set -ex arrow_dir=${1} +test_dir=${1}/python/build/dist export ARROW_SOURCE_DIR=${arrow_dir} export ARROW_TEST_DATA=${arrow_dir}/testing/data @@ -54,4 +55,14 @@ export PYARROW_TEST_ORC export PYARROW_TEST_PARQUET export PYARROW_TEST_S3 -pytest -r s -v ${PYTEST_ARGS} --pyargs pyarrow +# Testing PyArrow C++ +if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then + pushd ${test_dir} + ctest \ + --output-on-failure \ + --parallel ${n_jobs} \ + --timeout 300 + popd +fi +# Testing PyArrow +pytest -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index a6e763b6523da..6bd6ea22a35b9 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -156,6 +156,7 @@ export PYARROW_WITH_PLASMA=${ARROW_PLASMA} export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT} export PYARROW_WITH_S3=${ARROW_S3} export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" +export ARROW_HOME=${build_dir}/install # PyArrow build configuration export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig # Set PyArrow version explicitly diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh index af17606199e6f..47721e45e3b82 100755 --- a/ci/scripts/python_wheel_manylinux_build.sh +++ b/ci/scripts/python_wheel_manylinux_build.sh @@ -151,6 +151,7 @@ export PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION} export PYARROW_WITH_PLASMA=${ARROW_PLASMA} export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT} export PYARROW_WITH_S3=${ARROW_S3} +export ARROW_HOME=/tmp/arrow-dist # PyArrow build configuration export PKG_CONFIG_PATH=/usr/lib/pkgconfig:/tmp/arrow-dist/lib/pkgconfig diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh index 2bc5a4806f51b..f7795c3949234 100755 --- a/ci/scripts/r_docker_configure.sh +++ b/ci/scripts/r_docker_configure.sh @@ -67,25 +67,6 @@ sloppiness = include_file_ctime hash_dir = false" >> ~/.ccache/ccache.conf fi - -# Special hacking to try to reproduce quirks on fedora-clang-devel on CRAN -# which uses a bespoke clang compiled to use libc++ -# https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang -if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then - dnf install -y libcxx-devel - sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf - rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak - - sed -i.bak -E -e 's/(\-std=gnu\+\+)/-std=c++/g' $(${R_BIN} RHOME)/etc/Makeconf - rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak - - sed -i.bak -E -e 's/(CXXFLAGS = )(.*)/\1 -g -O3 -Wall -pedantic -frtti -fPIC/' $(${R_BIN} RHOME)/etc/Makeconf - rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak - - sed -i.bak -E -e 's/(LDFLAGS =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf - rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak -fi - # Special hacking to try to reproduce quirks on centos using non-default build # tooling. if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 945ff7b6f89ad..f0c3f30ef3d8d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -47,7 +47,7 @@ if(POLICY CMP0074) cmake_policy(SET CMP0074 NEW) endif() -set(ARROW_VERSION "9.0.0-SNAPSHOT") +set(ARROW_VERSION "10.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") @@ -753,9 +753,9 @@ if(ARROW_WITH_BZ2) endif() if(ARROW_WITH_LZ4) - list(APPEND ARROW_STATIC_LINK_LIBS lz4::lz4) + list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4) if(lz4_SOURCE STREQUAL "SYSTEM") - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS lz4::lz4) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4) endif() endif() @@ -822,6 +822,17 @@ if(ARROW_WITH_OPENTELEMETRY) opentelemetry-cpp::trace opentelemetry-cpp::ostream_span_exporter opentelemetry-cpp::otlp_http_exporter) + if(opentelemetry_SOURCE STREQUAL "SYSTEM") + list(APPEND + ARROW_STATIC_INSTALL_INTERFACE_LIBS + opentelemetry-cpp::trace + opentelemetry-cpp::ostream_span_exporter + opentelemetry-cpp::otlp_http_exporter) + endif() + if(Protobuf_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF}) + endif() + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) endif() if(ARROW_WITH_UTF8PROC) diff --git a/cpp/build-support/run-test.sh b/cpp/build-support/run-test.sh index d2d327cfdddc6..7f68abd31c369 100755 --- a/cpp/build-support/run-test.sh +++ b/cpp/build-support/run-test.sh @@ -109,8 +109,7 @@ function run_test() { # XML output from gtest. We assume that gtest knows better than us and our # regexes in most cases, but for certain errors we delete the resulting xml # file and let our own post-processing step regenerate it. - export GREP=$(which egrep) - if zgrep --silent "ThreadSanitizer|Leak check.*detected leaks" $LOGFILE ; then + if grep -E -q "ThreadSanitizer|Leak check.*detected leaks" $LOGFILE ; then echo ThreadSanitizer or leak check failures in $LOGFILE STATUS=1 rm -f $XMLFILE @@ -157,7 +156,7 @@ function post_process_tests() { # If we have a LeakSanitizer report, and XML reporting is configured, add a new test # case result to the XML file for the leak report. Otherwise Jenkins won't show # us which tests had LSAN errors. - if zgrep --silent "ERROR: LeakSanitizer: detected memory leaks" $LOGFILE ; then + if grep -E -q "ERROR: LeakSanitizer: detected memory leaks" $LOGFILE ; then echo Test had memory leaks. Editing XML perl -p -i -e ' if (m##) { diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index 888ca19af5814..a6adf58718ad5 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -399,8 +399,8 @@ function(ADD_ARROW_LIB LIB_NAME) endif() if(ARG_STATIC_LINK_LIBS) - target_link_libraries(${LIB_NAME}_static LINK_PRIVATE - "$") + target_link_libraries(${LIB_NAME}_static + PUBLIC "$") if(USE_OBJLIB) # Ensure that dependencies are built before compilation of objects in # object library, rather than only before the final link step diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index d5590a95ee48d..0dbf4cb843ef0 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -158,6 +158,8 @@ if(ARROW_DEFINE_OPTIONS) define_option(ARROW_GGDB_DEBUG "Pass -ggdb flag to debug builds" ON) + define_option(ARROW_WITH_MUSL "Whether the system libc is musl or not" OFF) + #---------------------------------------------------------------------- set_option_category("Test and benchmark") diff --git a/cpp/cmake_modules/FindArrowPython.cmake b/cpp/cmake_modules/FindArrowPython.cmake index b503e6a9e022c..5acd3dab1d0de 100644 --- a/cpp/cmake_modules/FindArrowPython.cmake +++ b/cpp/cmake_modules/FindArrowPython.cmake @@ -47,7 +47,7 @@ find_package(Arrow ${find_package_arguments}) if(ARROW_FOUND) arrow_find_package(ARROW_PYTHON - "${ARROW_HOME}" + "${PYARROW_CPP_HOME}" arrow_python arrow/python/api.h ArrowPython diff --git a/cpp/cmake_modules/FindArrowPythonFlight.cmake b/cpp/cmake_modules/FindArrowPythonFlight.cmake index 3a639928ce5ef..fb670fa7658f1 100644 --- a/cpp/cmake_modules/FindArrowPythonFlight.cmake +++ b/cpp/cmake_modules/FindArrowPythonFlight.cmake @@ -50,7 +50,7 @@ find_package(ArrowPython ${find_package_arguments}) if(ARROW_PYTHON_FOUND AND ARROW_FLIGHT_FOUND) arrow_find_package(ARROW_PYTHON_FLIGHT - "${ARROW_HOME}" + "${PYARROW_CPP_HOME}" arrow_python_flight arrow/python/flight.h ArrowPythonFlight diff --git a/cpp/cmake_modules/Findlz4Alt.cmake b/cpp/cmake_modules/Findlz4Alt.cmake index 186fec7e40af7..9fe8a31c1d490 100644 --- a/cpp/cmake_modules/Findlz4Alt.cmake +++ b/cpp/cmake_modules/Findlz4Alt.cmake @@ -25,6 +25,10 @@ endif() find_package(lz4 ${find_package_args}) if(lz4_FOUND) set(lz4Alt_FOUND TRUE) + # Conan uses lz4::lz4 not LZ4::lz4 + if(NOT TARGET LZ4::lz4 AND TARGET lz4::lz4) + add_library(LZ4::lz4 ALIAS lz4::lz4) + endif() return() endif() @@ -89,9 +93,9 @@ endif() find_package_handle_standard_args(lz4Alt REQUIRED_VARS LZ4_LIB LZ4_INCLUDE_DIR) if(lz4Alt_FOUND) - if(NOT TARGET lz4::lz4) - add_library(lz4::lz4 UNKNOWN IMPORTED) - set_target_properties(lz4::lz4 + if(NOT TARGET LZ4::lz4) + add_library(LZ4::lz4 UNKNOWN IMPORTED) + set_target_properties(LZ4::lz4 PROPERTIES IMPORTED_LOCATION "${LZ4_LIB}" INTERFACE_INCLUDE_DIRECTORIES "${LZ4_INCLUDE_DIR}") endif() diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 0a40ebe48a94f..bc503145da4ae 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -24,7 +24,9 @@ include(CheckCXXSourceCompiles) message(STATUS "System processor: ${CMAKE_SYSTEM_PROCESSOR}") if(NOT DEFINED ARROW_CPU_FLAG) - if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|arm64") + if(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|X86|x86|i[3456]86") + set(ARROW_CPU_FLAG "x86") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|arm64") set(ARROW_CPU_FLAG "armv8") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "armv7") set(ARROW_CPU_FLAG "armv7") @@ -32,8 +34,10 @@ if(NOT DEFINED ARROW_CPU_FLAG) set(ARROW_CPU_FLAG "ppc") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") set(ARROW_CPU_FLAG "s390x") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") + set(ARROW_CPU_FLAG "riscv64") else() - set(ARROW_CPU_FLAG "x86") + message(FATAL_ERROR "Unknown system processor") endif() endif() @@ -201,6 +205,24 @@ if(WIN32) # * https://developercommunity.visualstudio.com/content/problem/1249671/stdc17-generates-warning-compiling-windowsh.html set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} /wd5105") + if(ARROW_USE_CCACHE) + foreach(c_flag + CMAKE_CXX_FLAGS + CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_DEBUG + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS + CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_DEBUG + CMAKE_C_FLAGS_MINSIZEREL + CMAKE_C_FLAGS_RELWITHDEBINFO) + # ccache doesn't work with /Zi. + # See also: https://github.com/ccache/ccache/issues/1040 + string(REPLACE "/Zi" "/Z7" ${c_flag} "${${c_flag}}") + endforeach() + endif() + if(ARROW_USE_STATIC_CRT) foreach(c_flag CMAKE_CXX_FLAGS @@ -213,7 +235,7 @@ if(WIN32) CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) - string(REPLACE "/MD" "-MT" ${c_flag} "${${c_flag}}") + string(REPLACE "/MD" "/MT" ${c_flag} "${${c_flag}}") endforeach() endif() @@ -595,27 +617,42 @@ endif() # For CMAKE_BUILD_TYPE=Debug # -ggdb: Enable gdb debugging # For CMAKE_BUILD_TYPE=FastDebug -# Same as DEBUG, except with some optimizations on. +# Same as Debug, except with some optimizations on. # For CMAKE_BUILD_TYPE=Release -# -O3: Enable all compiler optimizations -# Debug symbols are stripped for reduced binary size. Add -# -DARROW_CXXFLAGS="-g" to add them +# -O2: Enable all compiler optimizations +# Debug symbols are stripped for reduced binary size. +# For CMAKE_BUILD_TYPE=RelWithDebInfo +# Same as Release, except with debug symbols enabled. + if(NOT MSVC) + string(REPLACE "-O3" "" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + string(REPLACE "-O3" "" CMAKE_CXX_FLAGS_RELWITHDEBINFO + "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") + + set(RELEASE_FLAGS "-O2 -DNDEBUG") + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(RELEASE_FLAGS "${RELEASE_FLAGS} -ftree-vectorize") + endif() + if(ARROW_GGDB_DEBUG) set(ARROW_DEBUG_SYMBOL_TYPE "gdb") set(C_FLAGS_DEBUG "-g${ARROW_DEBUG_SYMBOL_TYPE} -O0") set(C_FLAGS_FASTDEBUG "-g${ARROW_DEBUG_SYMBOL_TYPE} -O1") + set(C_FLAGS_RELWITHDEBINFO "-g${ARROW_DEBUG_SYMBOL_TYPE} ${RELEASE_FLAGS}") set(CXX_FLAGS_DEBUG "-g${ARROW_DEBUG_SYMBOL_TYPE} -O0") set(CXX_FLAGS_FASTDEBUG "-g${ARROW_DEBUG_SYMBOL_TYPE} -O1") + set(CXX_FLAGS_RELWITHDEBINFO "-g${ARROW_DEBUG_SYMBOL_TYPE} ${RELEASE_FLAGS}") else() set(C_FLAGS_DEBUG "-g -O0") set(C_FLAGS_FASTDEBUG "-g -O1") + set(C_FLAGS_RELWITHDEBINFO "-g ${RELEASE_FLAGS}") set(CXX_FLAGS_DEBUG "-g -O0") set(CXX_FLAGS_FASTDEBUG "-g -O1") + set(CXX_FLAGS_RELWITHDEBINFO "-g ${RELEASE_FLAGS}") endif() - set(C_FLAGS_RELEASE "-O3 -DNDEBUG") - set(CXX_FLAGS_RELEASE "-O3 -DNDEBUG") + set(C_FLAGS_RELEASE "${RELEASE_FLAGS}") + set(CXX_FLAGS_RELEASE "${RELEASE_FLAGS}") endif() set(C_FLAGS_PROFILE_GEN "${CXX_FLAGS_RELEASE} -fprofile-generate") @@ -630,7 +667,8 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${C_FLAGS_DEBUG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_DEBUG}") elseif("${CMAKE_BUILD_TYPE}" STREQUAL "RELWITHDEBINFO") - + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${C_FLAGS_RELWITHDEBINFO}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_RELWITHDEBINFO}") elseif("${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${C_FLAGS_FASTDEBUG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_FASTDEBUG}") diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 5d1da18b7785b..b904f3b62ece8 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -50,6 +50,7 @@ endif() # Resolve the dependencies set(ARROW_THIRDPARTY_DEPENDENCIES + absl AWSSDK benchmark Boost @@ -152,7 +153,9 @@ foreach(DEPENDENCY ${ARROW_THIRDPARTY_DEPENDENCIES}) endforeach() macro(build_dependency DEPENDENCY_NAME) - if("${DEPENDENCY_NAME}" STREQUAL "AWSSDK") + if("${DEPENDENCY_NAME}" STREQUAL "absl") + build_absl() + elseif("${DEPENDENCY_NAME}" STREQUAL "AWSSDK") build_awssdk() elseif("${DEPENDENCY_NAME}" STREQUAL "benchmark") build_benchmark() @@ -224,7 +227,12 @@ endmacro() macro(resolve_dependency DEPENDENCY_NAME) set(options) - set(one_value_args HAVE_ALT IS_RUNTIME_DEPENDENCY REQUIRED_VERSION USE_CONFIG) + set(one_value_args + FORCE_ANY_NEWER_VERSION + HAVE_ALT + IS_RUNTIME_DEPENDENCY + REQUIRED_VERSION + USE_CONFIG) set(multi_value_args COMPONENTS PC_PACKAGE_NAMES) cmake_parse_arguments(ARG "${options}" @@ -244,7 +252,7 @@ macro(resolve_dependency DEPENDENCY_NAME) set(PACKAGE_NAME ${DEPENDENCY_NAME}) endif() set(FIND_PACKAGE_ARGUMENTS ${PACKAGE_NAME}) - if(ARG_REQUIRED_VERSION) + if(ARG_REQUIRED_VERSION AND NOT ARG_FORCE_ANY_NEWER_VERSION) list(APPEND FIND_PACKAGE_ARGUMENTS ${ARG_REQUIRED_VERSION}) endif() if(ARG_USE_CONFIG) @@ -255,7 +263,16 @@ macro(resolve_dependency DEPENDENCY_NAME) endif() if(${DEPENDENCY_NAME}_SOURCE STREQUAL "AUTO") find_package(${FIND_PACKAGE_ARGUMENTS}) - if(${${PACKAGE_NAME}_FOUND}) + set(COMPATIBLE ${${PACKAGE_NAME}_FOUND}) + if(COMPATIBLE + AND ARG_FORCE_ANY_NEWER_VERSION + AND ARG_REQUIRED_VERSION) + if(${${PACKAGE_NAME}_VERSION} VERSION_LESS ${ARG_REQUIRED_VERSION}) + message(DEBUG "Couldn't find ${DEPENDENCY_NAME} >= ${ARG_REQUIRED_VERSION}") + set(COMPATIBLE FALSE) + endif() + endif() + if(COMPATIBLE) set(${DEPENDENCY_NAME}_SOURCE "SYSTEM") else() build_dependency(${DEPENDENCY_NAME}) @@ -265,6 +282,11 @@ macro(resolve_dependency DEPENDENCY_NAME) build_dependency(${DEPENDENCY_NAME}) elseif(${DEPENDENCY_NAME}_SOURCE STREQUAL "SYSTEM") find_package(${FIND_PACKAGE_ARGUMENTS} REQUIRED) + if(ARG_FORCE_ANY_NEWER_VERSION AND ARG_REQUIRED_VERSION) + if(${${PACKAGE_NAME}_VERSION} VERSION_LESS ${ARG_REQUIRED_VERSION}) + message(FATAL_ERROR "Couldn't find ${DEPENDENCY_NAME} >= ${ARG_REQUIRED_VERSION}") + endif() + endif() endif() if(${DEPENDENCY_NAME}_SOURCE STREQUAL "SYSTEM" AND ARG_IS_RUNTIME_DEPENDENCY) provide_find_module(${PACKAGE_NAME}) @@ -1016,12 +1038,16 @@ if(ARROW_USE_BOOST) # Find static boost headers and libs set(Boost_USE_STATIC_LIBS ON) endif() + if(ARROW_BOOST_REQUIRE_LIBRARY) + set(ARROW_BOOST_COMPONENTS system filesystem) + else() + set(ARROW_BOOST_COMPONENTS) + endif() resolve_dependency(Boost REQUIRED_VERSION ${ARROW_BOOST_REQUIRED_VERSION} COMPONENTS - system - filesystem + ${ARROW_BOOST_COMPONENTS} IS_RUNTIME_DEPENDENCY # libarrow.so doesn't depend on libboost*. FALSE) @@ -1080,6 +1106,7 @@ endif() macro(find_curl) if(NOT TARGET CURL::libcurl) find_package(CURL REQUIRED) + list(APPEND ARROW_SYSTEM_DEPENDENCIES CURL) if(NOT TARGET CURL::libcurl) # For CMake 3.11 or older add_library(CURL::libcurl UNKNOWN IMPORTED) @@ -1700,11 +1727,14 @@ if(ARROW_WITH_PROTOBUF) get_target_property(PROTOBUF_PROTOC_EXECUTABLE ${ARROW_PROTOBUF_PROTOC} IMPORTED_LOCATION) message(STATUS "Found protoc: ${PROTOBUF_PROTOC_EXECUTABLE}") - # Protobuf_PROTOC_LIBRARY is set by all versions of FindProtobuf.cmake - message(STATUS "Found libprotoc: ${Protobuf_PROTOC_LIBRARY}") - get_target_property(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF} IMPORTED_LOCATION) - message(STATUS "Found libprotobuf: ${PROTOBUF_LIBRARY}") - message(STATUS "Found protobuf headers: ${PROTOBUF_INCLUDE_DIR}") + get_target_property(PROTOBUF_TYPE ${ARROW_PROTOBUF_LIBPROTOBUF} TYPE) + if(NOT STREQUAL "INTERFACE_LIBRARY") + # Protobuf_PROTOC_LIBRARY is set by all versions of FindProtobuf.cmake + message(STATUS "Found libprotoc: ${Protobuf_PROTOC_LIBRARY}") + get_target_property(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF} IMPORTED_LOCATION) + message(STATUS "Found libprotobuf: ${PROTOBUF_LIBRARY}") + message(STATUS "Found protobuf headers: ${PROTOBUF_INCLUDE_DIR}") + endif() endif() # ---------------------------------------------------------------------- @@ -2242,7 +2272,11 @@ else() endif() if(ARROW_USE_XSIMD) - resolve_dependency(xsimd REQUIRED_VERSION "8.1.0") + resolve_dependency(xsimd + REQUIRED_VERSION + "8.1.0" + FORCE_ANY_NEWER_VERSION + TRUE) if(xsimd_SOURCE STREQUAL "BUNDLED") add_library(xsimd INTERFACE IMPORTED) @@ -2300,53 +2334,44 @@ if(ARROW_WITH_ZLIB) endif() macro(build_lz4) - message(STATUS "Building lz4 from source") - set(LZ4_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-prefix/src/lz4_ep") - set(LZ4_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-prefix") - - if(MSVC) - if(ARROW_USE_STATIC_CRT) - if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") - set(LZ4_RUNTIME_LIBRARY_LINKAGE "/p:RuntimeLibrary=MultiThreadedDebug") - else() - set(LZ4_RUNTIME_LIBRARY_LINKAGE "/p:RuntimeLibrary=MultiThreaded") - endif() - endif() - set(LZ4_STATIC_LIB - "${LZ4_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/liblz4_static.lib") - set(LZ4_BUILD_COMMAND - BUILD_COMMAND msbuild.exe /m /p:Configuration=${CMAKE_BUILD_TYPE} /p:Platform=x64 - /p:PlatformToolset=v140 ${LZ4_RUNTIME_LIBRARY_LINKAGE} /t:Build - ${LZ4_BUILD_DIR}/build/VS2010/lz4.sln) - else() - set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/lib/liblz4.a") - # Must explicitly invoke sh on MinGW - set(LZ4_BUILD_COMMAND - BUILD_COMMAND sh "${CMAKE_CURRENT_SOURCE_DIR}/build-support/build-lz4-lib.sh" - "AR=${CMAKE_AR}" "OS=${CMAKE_SYSTEM_NAME}") + message(STATUS "Building LZ4 from source") + if(CMAKE_VERSION VERSION_LESS 3.7) + message(FATAL_ERROR "Building LZ4 using ExternalProject requires at least CMake 3.7") endif() + set(LZ4_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-install") + + set(LZ4_STATIC_LIB + "${LZ4_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}lz4${CMAKE_STATIC_LIBRARY_SUFFIX}") + + set(LZ4_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + -DBUILD_SHARED_LIBS=OFF + -DBUILD_STATIC_LIBS=ON + -DCMAKE_INSTALL_LIBDIR=lib + -DCMAKE_INSTALL_PREFIX= + -DLZ4_BUILD_CLI=OFF + -DLZ4_BUILD_LEGACY_LZ4C=OFF) + # We need to copy the header in lib to directory outside of the build externalproject_add(lz4_ep - URL ${LZ4_SOURCE_URL} ${EP_LOG_OPTIONS} + ${EP_LOG_OPTIONS} + CMAKE_ARGS ${LZ4_CMAKE_ARGS} + SOURCE_SUBDIR "build/cmake" + INSTALL_DIR ${LZ4_PREFIX} + URL ${LZ4_SOURCE_URL} URL_HASH "SHA256=${ARROW_LZ4_BUILD_SHA256_CHECKSUM}" - UPDATE_COMMAND ${CMAKE_COMMAND} -E copy_directory - "${LZ4_BUILD_DIR}/lib" "${LZ4_PREFIX}/include" - ${LZ4_PATCH_COMMAND} - CONFIGURE_COMMAND "" - INSTALL_COMMAND "" - BINARY_DIR ${LZ4_BUILD_DIR} - BUILD_BYPRODUCTS ${LZ4_STATIC_LIB} ${LZ4_BUILD_COMMAND}) + BUILD_BYPRODUCTS ${LZ4_STATIC_LIB}) file(MAKE_DIRECTORY "${LZ4_PREFIX}/include") - add_library(lz4::lz4 STATIC IMPORTED) - set_target_properties(lz4::lz4 + add_library(LZ4::lz4 STATIC IMPORTED) + set_target_properties(LZ4::lz4 PROPERTIES IMPORTED_LOCATION "${LZ4_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES "${LZ4_PREFIX}/include") add_dependencies(toolchain lz4_ep) - add_dependencies(lz4::lz4 lz4_ep) + add_dependencies(LZ4::lz4 lz4_ep) - list(APPEND ARROW_BUNDLED_STATIC_LIBS lz4::lz4) + list(APPEND ARROW_BUNDLED_STATIC_LIBS LZ4::lz4) endmacro() if(ARROW_WITH_LZ4) @@ -2358,7 +2383,12 @@ if(ARROW_WITH_LZ4) endif() macro(build_zstd) - message(STATUS "Building zstd from source") + message(STATUS "Building Zstandard from source") + if(CMAKE_VERSION VERSION_LESS 3.7) + message(FATAL_ERROR "Building Zstandard using ExternalProject requires at least CMake 3.7" + ) + endif() + set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-install") set(ZSTD_CMAKE_ARGS @@ -2388,10 +2418,6 @@ macro(build_zstd) -DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}) endif() - if(CMAKE_VERSION VERSION_LESS 3.7) - message(FATAL_ERROR "Building zstd using ExternalProject requires at least CMake 3.7") - endif() - externalproject_add(zstd_ep ${EP_LOG_OPTIONS} CMAKE_ARGS ${ZSTD_CMAKE_ARGS} @@ -2656,948 +2682,966 @@ endmacro() # ---------------------------------------------------------------------- # Dependencies for Arrow Flight RPC -macro(resolve_dependency_absl) - # Choose one of built absl::* targets - if(NOT TARGET absl::algorithm) - message(STATUS "Building Abseil-cpp from source") - set(ABSL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/absl_ep-install") - set(ABSL_INCLUDE_DIR "${ABSL_PREFIX}/include") - set(ABSL_CMAKE_ARGS - "${EP_COMMON_CMAKE_ARGS}" -DABSL_RUN_TESTS=OFF -DCMAKE_INSTALL_LIBDIR=lib - "-DCMAKE_INSTALL_PREFIX=${ABSL_PREFIX}") - set(ABSL_BUILD_BYPRODUCTS) - set(ABSL_LIBRARIES) - - # Abseil produces the following libraries, each is fairly small, but there - # are (as you can see), many of them. We need to add the libraries first, - # and then describe how they depend on each other. The list can be - # refreshed using: - # ls -1 $PREFIX/lib/libabsl_*.a | sed -e 's/.*libabsl_//' -e 's/.a$//' - set(_ABSL_LIBS - bad_any_cast_impl - bad_optional_access - bad_variant_access - base - city - civil_time - cord - cord_internal - cordz_functions - cordz_handle - cordz_info - cordz_sample_token - debugging_internal - demangle_internal - examine_stack - exponential_biased - failure_signal_handler - flags - flags_commandlineflag - flags_commandlineflag_internal - flags_config - flags_internal - flags_marshalling - flags_parse - flags_private_handle_accessor - flags_program_name - flags_reflection - flags_usage - flags_usage_internal - graphcycles_internal - hash - hashtablez_sampler - int128 - leak_check - leak_check_disable - log_severity - low_level_hash - malloc_internal - periodic_sampler - random_distributions - random_internal_distribution_test_util - random_internal_platform - random_internal_pool_urbg - random_internal_randen - random_internal_randen_hwaes - random_internal_randen_hwaes_impl - random_internal_randen_slow - random_internal_seed_material - random_seed_gen_exception - random_seed_sequences - raw_hash_set - raw_logging_internal - scoped_set_env - spinlock_wait - stacktrace - status - statusor - str_format_internal - strerror - strings - strings_internal - symbolize - synchronization - throw_delegate - time - time_zone - wyhash) - # Abseil creates a number of header-only targets, which are needed to resolve dependencies. - # The list can be refreshed using: - # comm -13 <(ls -l $PREFIX/lib/libabsl_*.a | sed -e 's/.*libabsl_//' -e 's/.a$//' | sort -u) \ - # <(ls -1 $PREFIX/lib/pkgconfig/absl_*.pc | sed -e 's/.*absl_//' -e 's/.pc$//' | sort -u) - set(_ABSL_INTERFACE_LIBS - algorithm - algorithm_container - any - atomic_hook - bad_any_cast - base_internal - bind_front - bits - btree - cleanup - cleanup_internal - compare - compressed_tuple - config - container_common - container_memory - cordz_statistics - cordz_update_scope - cordz_update_tracker - core_headers - counting_allocator - debugging - dynamic_annotations - endian - errno_saver - fast_type_id - fixed_array - flags_path_util - flat_hash_map - flat_hash_set - function_ref - hash_function_defaults - hash_policy_traits - hashtable_debug - hashtable_debug_hooks - have_sse - inlined_vector - inlined_vector_internal - kernel_timeout_internal - layout - memory - meta - node_hash_map - node_hash_policy - node_hash_set - numeric - numeric_representation - optional - pretty_function - random_bit_gen_ref - random_internal_distribution_caller - random_internal_fast_uniform_bits - random_internal_fastmath - random_internal_generate_real - random_internal_iostream_state_saver - random_internal_mock_helpers - random_internal_nonsecure_base - random_internal_pcg_engine - random_internal_randen_engine - random_internal_salted_seed_seq - random_internal_traits - random_internal_uniform_helper - random_internal_wide_multiply - random_random - raw_hash_map - sample_recorder - span - str_format - type_traits - utility - variant) - - foreach(_ABSL_LIB ${_ABSL_LIBS}) - set(_ABSL_STATIC_LIBRARY - "${ABSL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}absl_${_ABSL_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - add_library(absl::${_ABSL_LIB} STATIC IMPORTED) - set_target_properties(absl::${_ABSL_LIB} - PROPERTIES IMPORTED_LOCATION ${_ABSL_STATIC_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES - "${ABSL_INCLUDE_DIR}") - list(APPEND ABSL_BUILD_BYPRODUCTS ${_ABSL_STATIC_LIBRARY}) - endforeach() - foreach(_ABSL_LIB ${_ABSL_INTERFACE_LIBS}) - add_library(absl::${_ABSL_LIB} INTERFACE IMPORTED) - set_target_properties(absl::${_ABSL_LIB} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "${ABSL_INCLUDE_DIR}") - endforeach() +macro(ensure_absl) + if(NOT absl_FOUND) + if(${absl_SOURCE} STREQUAL "AUTO") + # We can't use resolve_dependency(absl 20211102) to use Abseil + # 20211102 or later because Abseil's CMake package uses "EXACT" + # version match strategy. Our CMake configuration will work with + # Abseil LTS 20211102 or later. So we want to accept Abseil LTS + # 20211102 or later. We need to update + # ARROW_ABSL_REQUIRED_LTS_VERSIONS list when new Abseil LTS is + # released. + set(ARROW_ABSL_REQUIRED_LTS_VERSIONS 20211102 20220623) + foreach(_VERSION ${ARROW_ABSL_REQUIRED_LTS_VERSIONS}) + find_package(absl ${_VERSION}) + if(absl_FOUND) + break() + endif() + endforeach() + # If we can't find Abseil LTS 20211102 or later, we use bundled + # Abseil. + if(NOT absl_FOUND) + set(absl_SOURCE "BUNDLED") + endif() + endif() + resolve_dependency(absl) + endif() +endmacro() - # Extracted the dependency information using the Abseil pkg-config files: - # grep Requires $PREFIX/lib/pkgconfig/absl_*.pc | \ - # sed -e 's;.*/absl_;set_property(TARGET absl::;' \ - # -e 's/.pc:Requires:/ PROPERTY INTERFACE_LINK_LIBRARIES /' \ - # -E -e 's/ = 20[0-9]{6},?//g' \ - # -e 's/absl_/absl::/g' \ - # -e 's/$/)/' | \ - # grep -v 'INTERFACE_LINK_LIBRARIES[ ]*)' - set_property(TARGET absl::algorithm PROPERTY INTERFACE_LINK_LIBRARIES absl::config) - set_property(TARGET absl::algorithm_container - PROPERTY INTERFACE_LINK_LIBRARIES absl::algorithm absl::core_headers - absl::meta) - set_property(TARGET absl::any - PROPERTY INTERFACE_LINK_LIBRARIES - absl::bad_any_cast - absl::config - absl::core_headers - absl::fast_type_id - absl::type_traits - absl::utility) - set_property(TARGET absl::atomic_hook PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::core_headers) - set_property(TARGET absl::bad_any_cast PROPERTY INTERFACE_LINK_LIBRARIES - absl::bad_any_cast_impl absl::config) - set_property(TARGET absl::bad_any_cast_impl - PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::raw_logging_internal) - set_property(TARGET absl::bad_optional_access - PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::raw_logging_internal) - set_property(TARGET absl::bad_variant_access - PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::raw_logging_internal) - set_property(TARGET absl::base - PROPERTY INTERFACE_LINK_LIBRARIES - absl::atomic_hook - absl::base_internal - absl::config - absl::core_headers - absl::dynamic_annotations - absl::log_severity - absl::raw_logging_internal - absl::spinlock_wait - absl::type_traits) - set_property(TARGET absl::base_internal PROPERTY INTERFACE_LINK_LIBRARIES - absl::config absl::type_traits) - set_property(TARGET absl::bind_front - PROPERTY INTERFACE_LINK_LIBRARIES absl::base_internal - absl::compressed_tuple) - set_property(TARGET absl::bits PROPERTY INTERFACE_LINK_LIBRARIES absl::core_headers) - set_property(TARGET absl::btree - PROPERTY INTERFACE_LINK_LIBRARIES - absl::container_common - absl::compare - absl::compressed_tuple - absl::container_memory - absl::cord - absl::core_headers - absl::layout - absl::memory - absl::strings - absl::throw_delegate - absl::type_traits - absl::utility) - set_property(TARGET absl::city PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::core_headers absl::endian) - set_property(TARGET absl::cleanup - PROPERTY INTERFACE_LINK_LIBRARIES absl::cleanup_internal absl::config - absl::core_headers) - set_property(TARGET absl::cleanup_internal - PROPERTY INTERFACE_LINK_LIBRARIES absl::base_internal absl::core_headers - absl::utility) - set_property(TARGET absl::compare PROPERTY INTERFACE_LINK_LIBRARIES - absl::core_headers absl::type_traits) - set_property(TARGET absl::compressed_tuple PROPERTY INTERFACE_LINK_LIBRARIES - absl::utility) - set_property(TARGET absl::container_common PROPERTY INTERFACE_LINK_LIBRARIES - absl::type_traits) - set_property(TARGET absl::container_memory - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::memory - absl::type_traits - absl::utility) - set_property(TARGET absl::cord - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base - absl::config - absl::cord_internal - absl::cordz_functions - absl::cordz_info - absl::cordz_update_scope - absl::cordz_update_tracker - absl::core_headers - absl::endian - absl::fixed_array - absl::function_ref - absl::inlined_vector - absl::optional - absl::raw_logging_internal - absl::strings - absl::type_traits) - set_property(TARGET absl::cord_internal - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base_internal - absl::compressed_tuple - absl::config - absl::core_headers - absl::endian - absl::inlined_vector - absl::layout - absl::raw_logging_internal - absl::strings - absl::throw_delegate - absl::type_traits) - set_property(TARGET absl::cordz_functions - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::core_headers - absl::exponential_biased - absl::raw_logging_internal) - set_property(TARGET absl::cordz_handle - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base - absl::config - absl::raw_logging_internal - absl::synchronization) - set_property(TARGET absl::cordz_info - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base - absl::config - absl::cord_internal - absl::cordz_functions - absl::cordz_handle - absl::cordz_statistics - absl::cordz_update_tracker - absl::core_headers - absl::inlined_vector - absl::span - absl::raw_logging_internal - absl::stacktrace - absl::synchronization) - set_property(TARGET absl::cordz_sample_token - PROPERTY INTERFACE_LINK_LIBRARIES absl::config absl::cordz_handle - absl::cordz_info) - set_property(TARGET absl::cordz_statistics - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::core_headers - absl::cordz_update_tracker - absl::synchronization) - set_property(TARGET absl::cordz_update_scope - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::cord_internal - absl::cordz_info - absl::cordz_update_tracker - absl::core_headers) - set_property(TARGET absl::cordz_update_tracker PROPERTY INTERFACE_LINK_LIBRARIES - absl::config) - set_property(TARGET absl::core_headers PROPERTY INTERFACE_LINK_LIBRARIES absl::config) - set_property(TARGET absl::counting_allocator PROPERTY INTERFACE_LINK_LIBRARIES +macro(build_absl) + message(STATUS "Building Abseil-cpp from source") + set(absl_FOUND TRUE) + set(absl_VERSION ${ARROW_ABSL_BUILD_VERSION}) + set(ABSL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/absl_ep-install") + set(ABSL_INCLUDE_DIR "${ABSL_PREFIX}/include") + set(ABSL_CMAKE_ARGS "${EP_COMMON_CMAKE_ARGS}" -DABSL_RUN_TESTS=OFF + -DCMAKE_INSTALL_LIBDIR=lib "-DCMAKE_INSTALL_PREFIX=${ABSL_PREFIX}") + set(ABSL_BUILD_BYPRODUCTS) + set(ABSL_LIBRARIES) + + # Abseil produces the following libraries, each is fairly small, but there + # are (as you can see), many of them. We need to add the libraries first, + # and then describe how they depend on each other. The list can be + # refreshed using: + # ls -1 $PREFIX/lib/libabsl_*.a | sed -e 's/.*libabsl_//' -e 's/.a$//' + set(_ABSL_LIBS + bad_any_cast_impl + bad_optional_access + bad_variant_access + base + city + civil_time + cord + cord_internal + cordz_functions + cordz_handle + cordz_info + cordz_sample_token + debugging_internal + demangle_internal + examine_stack + exponential_biased + failure_signal_handler + flags + flags_commandlineflag + flags_commandlineflag_internal + flags_config + flags_internal + flags_marshalling + flags_parse + flags_private_handle_accessor + flags_program_name + flags_reflection + flags_usage + flags_usage_internal + graphcycles_internal + hash + hashtablez_sampler + int128 + leak_check + leak_check_disable + log_severity + low_level_hash + malloc_internal + periodic_sampler + random_distributions + random_internal_distribution_test_util + random_internal_platform + random_internal_pool_urbg + random_internal_randen + random_internal_randen_hwaes + random_internal_randen_hwaes_impl + random_internal_randen_slow + random_internal_seed_material + random_seed_gen_exception + random_seed_sequences + raw_hash_set + raw_logging_internal + scoped_set_env + spinlock_wait + stacktrace + status + statusor + str_format_internal + strerror + strings + strings_internal + symbolize + synchronization + throw_delegate + time + time_zone + wyhash) + # Abseil creates a number of header-only targets, which are needed to resolve dependencies. + # The list can be refreshed using: + # comm -13 <(ls -l $PREFIX/lib/libabsl_*.a | sed -e 's/.*libabsl_//' -e 's/.a$//' | sort -u) \ + # <(ls -1 $PREFIX/lib/pkgconfig/absl_*.pc | sed -e 's/.*absl_//' -e 's/.pc$//' | sort -u) + set(_ABSL_INTERFACE_LIBS + algorithm + algorithm_container + any + atomic_hook + bad_any_cast + base_internal + bind_front + bits + btree + cleanup + cleanup_internal + compare + compressed_tuple + config + container_common + container_memory + cordz_statistics + cordz_update_scope + cordz_update_tracker + core_headers + counting_allocator + debugging + dynamic_annotations + endian + errno_saver + fast_type_id + fixed_array + flags_path_util + flat_hash_map + flat_hash_set + function_ref + hash_function_defaults + hash_policy_traits + hashtable_debug + hashtable_debug_hooks + have_sse + inlined_vector + inlined_vector_internal + kernel_timeout_internal + layout + memory + meta + node_hash_map + node_hash_policy + node_hash_set + numeric + numeric_representation + optional + pretty_function + random_bit_gen_ref + random_internal_distribution_caller + random_internal_fast_uniform_bits + random_internal_fastmath + random_internal_generate_real + random_internal_iostream_state_saver + random_internal_mock_helpers + random_internal_nonsecure_base + random_internal_pcg_engine + random_internal_randen_engine + random_internal_salted_seed_seq + random_internal_traits + random_internal_uniform_helper + random_internal_wide_multiply + random_random + raw_hash_map + sample_recorder + span + str_format + type_traits + utility + variant) + + foreach(_ABSL_LIB ${_ABSL_LIBS}) + set(_ABSL_STATIC_LIBRARY + "${ABSL_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}absl_${_ABSL_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + add_library(absl::${_ABSL_LIB} STATIC IMPORTED) + set_target_properties(absl::${_ABSL_LIB} + PROPERTIES IMPORTED_LOCATION ${_ABSL_STATIC_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES "${ABSL_INCLUDE_DIR}") + list(APPEND ABSL_BUILD_BYPRODUCTS ${_ABSL_STATIC_LIBRARY}) + endforeach() + foreach(_ABSL_LIB ${_ABSL_INTERFACE_LIBS}) + add_library(absl::${_ABSL_LIB} INTERFACE IMPORTED) + set_target_properties(absl::${_ABSL_LIB} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + "${ABSL_INCLUDE_DIR}") + endforeach() + + # Extracted the dependency information using the Abseil pkg-config files: + # grep Requires $PREFIX/lib/pkgconfig/absl_*.pc | \ + # sed -e 's;.*/absl_;set_property(TARGET absl::;' \ + # -e 's/.pc:Requires:/ PROPERTY INTERFACE_LINK_LIBRARIES /' \ + # -E -e 's/ = 20[0-9]{6},?//g' \ + # -e 's/absl_/absl::/g' \ + # -e 's/$/)/' | \ + # grep -v 'INTERFACE_LINK_LIBRARIES[ ]*)' + set_property(TARGET absl::algorithm PROPERTY INTERFACE_LINK_LIBRARIES absl::config) + set_property(TARGET absl::algorithm_container + PROPERTY INTERFACE_LINK_LIBRARIES absl::algorithm absl::core_headers + absl::meta) + set_property(TARGET absl::any + PROPERTY INTERFACE_LINK_LIBRARIES + absl::bad_any_cast + absl::config + absl::core_headers + absl::fast_type_id + absl::type_traits + absl::utility) + set_property(TARGET absl::atomic_hook PROPERTY INTERFACE_LINK_LIBRARIES absl::config + absl::core_headers) + set_property(TARGET absl::bad_any_cast PROPERTY INTERFACE_LINK_LIBRARIES + absl::bad_any_cast_impl absl::config) + set_property(TARGET absl::bad_any_cast_impl + PROPERTY INTERFACE_LINK_LIBRARIES absl::config absl::raw_logging_internal) + set_property(TARGET absl::bad_optional_access + PROPERTY INTERFACE_LINK_LIBRARIES absl::config absl::raw_logging_internal) + set_property(TARGET absl::bad_variant_access + PROPERTY INTERFACE_LINK_LIBRARIES absl::config absl::raw_logging_internal) + set_property(TARGET absl::base + PROPERTY INTERFACE_LINK_LIBRARIES + absl::atomic_hook + absl::base_internal + absl::config + absl::core_headers + absl::dynamic_annotations + absl::log_severity + absl::raw_logging_internal + absl::spinlock_wait + absl::type_traits) + set_property(TARGET absl::base_internal PROPERTY INTERFACE_LINK_LIBRARIES absl::config + absl::type_traits) + set_property(TARGET absl::bind_front + PROPERTY INTERFACE_LINK_LIBRARIES absl::base_internal + absl::compressed_tuple) + set_property(TARGET absl::bits PROPERTY INTERFACE_LINK_LIBRARIES absl::core_headers) + set_property(TARGET absl::btree + PROPERTY INTERFACE_LINK_LIBRARIES + absl::container_common + absl::compare + absl::compressed_tuple + absl::container_memory + absl::cord + absl::core_headers + absl::layout + absl::memory + absl::strings + absl::throw_delegate + absl::type_traits + absl::utility) + set_property(TARGET absl::city PROPERTY INTERFACE_LINK_LIBRARIES absl::config + absl::core_headers absl::endian) + set_property(TARGET absl::cleanup + PROPERTY INTERFACE_LINK_LIBRARIES absl::cleanup_internal absl::config + absl::core_headers) + set_property(TARGET absl::cleanup_internal + PROPERTY INTERFACE_LINK_LIBRARIES absl::base_internal absl::core_headers + absl::utility) + set_property(TARGET absl::compare PROPERTY INTERFACE_LINK_LIBRARIES absl::core_headers + absl::type_traits) + set_property(TARGET absl::compressed_tuple PROPERTY INTERFACE_LINK_LIBRARIES + absl::utility) + set_property(TARGET absl::container_common PROPERTY INTERFACE_LINK_LIBRARIES + absl::type_traits) + set_property(TARGET absl::container_memory + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::memory + absl::type_traits + absl::utility) + set_property(TARGET absl::cord + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base + absl::config + absl::cord_internal + absl::cordz_functions + absl::cordz_info + absl::cordz_update_scope + absl::cordz_update_tracker + absl::core_headers + absl::endian + absl::fixed_array + absl::function_ref + absl::inlined_vector + absl::optional + absl::raw_logging_internal + absl::strings + absl::type_traits) + set_property(TARGET absl::cord_internal + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base_internal + absl::compressed_tuple + absl::config + absl::core_headers + absl::endian + absl::inlined_vector + absl::layout + absl::raw_logging_internal + absl::strings + absl::throw_delegate + absl::type_traits) + set_property(TARGET absl::cordz_functions + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::core_headers + absl::exponential_biased + absl::raw_logging_internal) + set_property(TARGET absl::cordz_handle + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base + absl::config + absl::raw_logging_internal + absl::synchronization) + set_property(TARGET absl::cordz_info + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base + absl::config + absl::cord_internal + absl::cordz_functions + absl::cordz_handle + absl::cordz_statistics + absl::cordz_update_tracker + absl::core_headers + absl::inlined_vector + absl::span + absl::raw_logging_internal + absl::stacktrace + absl::synchronization) + set_property(TARGET absl::cordz_sample_token + PROPERTY INTERFACE_LINK_LIBRARIES absl::config absl::cordz_handle + absl::cordz_info) + set_property(TARGET absl::cordz_statistics + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::core_headers + absl::cordz_update_tracker + absl::synchronization) + set_property(TARGET absl::cordz_update_scope + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::cord_internal + absl::cordz_info + absl::cordz_update_tracker + absl::core_headers) + set_property(TARGET absl::cordz_update_tracker PROPERTY INTERFACE_LINK_LIBRARIES absl::config) - set_property(TARGET absl::debugging PROPERTY INTERFACE_LINK_LIBRARIES - absl::stacktrace absl::leak_check) - set_property(TARGET absl::debugging_internal - PROPERTY INTERFACE_LINK_LIBRARIES - absl::core_headers - absl::config - absl::dynamic_annotations - absl::errno_saver - absl::raw_logging_internal) - set_property(TARGET absl::demangle_internal PROPERTY INTERFACE_LINK_LIBRARIES - absl::base absl::core_headers) - set_property(TARGET absl::dynamic_annotations PROPERTY INTERFACE_LINK_LIBRARIES + set_property(TARGET absl::core_headers PROPERTY INTERFACE_LINK_LIBRARIES absl::config) + set_property(TARGET absl::counting_allocator PROPERTY INTERFACE_LINK_LIBRARIES + absl::config) + set_property(TARGET absl::debugging PROPERTY INTERFACE_LINK_LIBRARIES absl::stacktrace + absl::leak_check) + set_property(TARGET absl::debugging_internal + PROPERTY INTERFACE_LINK_LIBRARIES + absl::core_headers + absl::config + absl::dynamic_annotations + absl::errno_saver + absl::raw_logging_internal) + set_property(TARGET absl::demangle_internal PROPERTY INTERFACE_LINK_LIBRARIES + absl::base absl::core_headers) + set_property(TARGET absl::dynamic_annotations PROPERTY INTERFACE_LINK_LIBRARIES + absl::config) + set_property(TARGET absl::endian PROPERTY INTERFACE_LINK_LIBRARIES absl::base + absl::config absl::core_headers) + set_property(TARGET absl::errno_saver PROPERTY INTERFACE_LINK_LIBRARIES absl::config) + set_property(TARGET absl::examine_stack + PROPERTY INTERFACE_LINK_LIBRARIES + absl::stacktrace + absl::symbolize + absl::config + absl::core_headers + absl::raw_logging_internal) + set_property(TARGET absl::exponential_biased PROPERTY INTERFACE_LINK_LIBRARIES + absl::config absl::core_headers) + set_property(TARGET absl::failure_signal_handler + PROPERTY INTERFACE_LINK_LIBRARIES + absl::examine_stack + absl::stacktrace + absl::base + absl::config + absl::core_headers + absl::errno_saver + absl::raw_logging_internal) + set_property(TARGET absl::fast_type_id PROPERTY INTERFACE_LINK_LIBRARIES absl::config) + set_property(TARGET absl::fixed_array + PROPERTY INTERFACE_LINK_LIBRARIES + absl::compressed_tuple + absl::algorithm + absl::config + absl::core_headers + absl::dynamic_annotations + absl::throw_delegate + absl::memory) + set_property(TARGET absl::flags + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::flags_commandlineflag + absl::flags_config + absl::flags_internal + absl::flags_reflection + absl::base + absl::core_headers + absl::strings) + set_property(TARGET absl::flags_commandlineflag + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::fast_type_id + absl::flags_commandlineflag_internal + absl::optional + absl::strings) + set_property(TARGET absl::flags_commandlineflag_internal + PROPERTY INTERFACE_LINK_LIBRARIES absl::config absl::fast_type_id) + set_property(TARGET absl::flags_config + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::flags_path_util + absl::flags_program_name + absl::core_headers + absl::strings + absl::synchronization) + set_property(TARGET absl::flags_internal + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base + absl::config + absl::flags_commandlineflag + absl::flags_commandlineflag_internal + absl::flags_config + absl::flags_marshalling + absl::synchronization + absl::meta + absl::utility) + set_property(TARGET absl::flags_marshalling + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::core_headers + absl::log_severity + absl::strings + absl::str_format) + set_property(TARGET absl::flags_parse + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::core_headers + absl::flags_config + absl::flags + absl::flags_commandlineflag + absl::flags_commandlineflag_internal + absl::flags_internal + absl::flags_private_handle_accessor + absl::flags_program_name + absl::flags_reflection + absl::flags_usage + absl::strings + absl::synchronization) + set_property(TARGET absl::flags_path_util PROPERTY INTERFACE_LINK_LIBRARIES + absl::config absl::strings) + set_property(TARGET absl::flags_private_handle_accessor + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::flags_commandlineflag + absl::flags_commandlineflag_internal + absl::strings) + set_property(TARGET absl::flags_program_name + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::core_headers + absl::flags_path_util + absl::strings + absl::synchronization) + set_property(TARGET absl::flags_reflection + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::flags_commandlineflag + absl::flags_private_handle_accessor + absl::flags_config + absl::strings + absl::synchronization + absl::flat_hash_map) + set_property(TARGET absl::flags_usage + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::core_headers + absl::flags_usage_internal + absl::strings + absl::synchronization) + set_property(TARGET absl::flags_usage_internal + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::flags_config + absl::flags + absl::flags_commandlineflag + absl::flags_internal + absl::flags_path_util + absl::flags_private_handle_accessor + absl::flags_program_name + absl::flags_reflection + absl::flat_hash_map + absl::strings + absl::synchronization) + set_property(TARGET absl::flat_hash_map + PROPERTY INTERFACE_LINK_LIBRARIES + absl::container_memory + absl::hash_function_defaults + absl::raw_hash_map + absl::algorithm_container + absl::memory) + set_property(TARGET absl::flat_hash_set + PROPERTY INTERFACE_LINK_LIBRARIES + absl::container_memory + absl::hash_function_defaults + absl::raw_hash_set + absl::algorithm_container + absl::core_headers + absl::memory) + set_property(TARGET absl::function_ref + PROPERTY INTERFACE_LINK_LIBRARIES absl::base_internal absl::core_headers + absl::meta) + set_property(TARGET absl::graphcycles_internal + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base + absl::base_internal + absl::config + absl::core_headers + absl::malloc_internal + absl::raw_logging_internal) + set_property(TARGET absl::hash + PROPERTY INTERFACE_LINK_LIBRARIES + absl::city + absl::config + absl::core_headers + absl::endian + absl::fixed_array + absl::meta + absl::int128 + absl::strings + absl::optional + absl::variant + absl::utility + absl::low_level_hash) + set_property(TARGET absl::hash_function_defaults + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::cord + absl::hash + absl::strings) + set_property(TARGET absl::hash_policy_traits PROPERTY INTERFACE_LINK_LIBRARIES + absl::meta) + set_property(TARGET absl::hashtable_debug PROPERTY INTERFACE_LINK_LIBRARIES + absl::hashtable_debug_hooks) + set_property(TARGET absl::hashtable_debug_hooks PROPERTY INTERFACE_LINK_LIBRARIES absl::config) - set_property(TARGET absl::endian PROPERTY INTERFACE_LINK_LIBRARIES absl::base - absl::config absl::core_headers) - set_property(TARGET absl::errno_saver PROPERTY INTERFACE_LINK_LIBRARIES absl::config) - set_property(TARGET absl::examine_stack - PROPERTY INTERFACE_LINK_LIBRARIES - absl::stacktrace - absl::symbolize - absl::config - absl::core_headers - absl::raw_logging_internal) - set_property(TARGET absl::exponential_biased PROPERTY INTERFACE_LINK_LIBRARIES - absl::config absl::core_headers) - set_property(TARGET absl::failure_signal_handler - PROPERTY INTERFACE_LINK_LIBRARIES - absl::examine_stack - absl::stacktrace - absl::base - absl::config - absl::core_headers - absl::errno_saver - absl::raw_logging_internal) - set_property(TARGET absl::fast_type_id PROPERTY INTERFACE_LINK_LIBRARIES absl::config) - set_property(TARGET absl::fixed_array - PROPERTY INTERFACE_LINK_LIBRARIES - absl::compressed_tuple - absl::algorithm - absl::config - absl::core_headers - absl::dynamic_annotations - absl::throw_delegate - absl::memory) - set_property(TARGET absl::flags - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::flags_commandlineflag - absl::flags_config - absl::flags_internal - absl::flags_reflection - absl::base - absl::core_headers - absl::strings) - set_property(TARGET absl::flags_commandlineflag - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::fast_type_id - absl::flags_commandlineflag_internal - absl::optional - absl::strings) - set_property(TARGET absl::flags_commandlineflag_internal - PROPERTY INTERFACE_LINK_LIBRARIES absl::config absl::fast_type_id) - set_property(TARGET absl::flags_config - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::flags_path_util - absl::flags_program_name - absl::core_headers - absl::strings - absl::synchronization) - set_property(TARGET absl::flags_internal - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base - absl::config - absl::flags_commandlineflag - absl::flags_commandlineflag_internal - absl::flags_config - absl::flags_marshalling - absl::synchronization - absl::meta - absl::utility) - set_property(TARGET absl::flags_marshalling - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::core_headers - absl::log_severity - absl::strings - absl::str_format) - set_property(TARGET absl::flags_parse - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::core_headers - absl::flags_config - absl::flags - absl::flags_commandlineflag - absl::flags_commandlineflag_internal - absl::flags_internal - absl::flags_private_handle_accessor - absl::flags_program_name - absl::flags_reflection - absl::flags_usage - absl::strings - absl::synchronization) - set_property(TARGET absl::flags_path_util PROPERTY INTERFACE_LINK_LIBRARIES - absl::config absl::strings) - set_property(TARGET absl::flags_private_handle_accessor - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::flags_commandlineflag - absl::flags_commandlineflag_internal - absl::strings) - set_property(TARGET absl::flags_program_name - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::core_headers - absl::flags_path_util - absl::strings - absl::synchronization) - set_property(TARGET absl::flags_reflection - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::flags_commandlineflag - absl::flags_private_handle_accessor - absl::flags_config - absl::strings - absl::synchronization - absl::flat_hash_map) - set_property(TARGET absl::flags_usage - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::core_headers - absl::flags_usage_internal - absl::strings - absl::synchronization) - set_property(TARGET absl::flags_usage_internal - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::flags_config - absl::flags - absl::flags_commandlineflag - absl::flags_internal - absl::flags_path_util - absl::flags_private_handle_accessor - absl::flags_program_name - absl::flags_reflection - absl::flat_hash_map - absl::strings - absl::synchronization) - set_property(TARGET absl::flat_hash_map - PROPERTY INTERFACE_LINK_LIBRARIES - absl::container_memory - absl::hash_function_defaults - absl::raw_hash_map - absl::algorithm_container - absl::memory) - set_property(TARGET absl::flat_hash_set - PROPERTY INTERFACE_LINK_LIBRARIES - absl::container_memory - absl::hash_function_defaults - absl::raw_hash_set - absl::algorithm_container - absl::core_headers - absl::memory) - set_property(TARGET absl::function_ref - PROPERTY INTERFACE_LINK_LIBRARIES absl::base_internal absl::core_headers - absl::meta) - set_property(TARGET absl::graphcycles_internal - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base - absl::base_internal - absl::config - absl::core_headers - absl::malloc_internal - absl::raw_logging_internal) - set_property(TARGET absl::hash - PROPERTY INTERFACE_LINK_LIBRARIES - absl::city - absl::config - absl::core_headers - absl::endian - absl::fixed_array - absl::meta - absl::int128 - absl::strings - absl::optional - absl::variant - absl::utility - absl::low_level_hash) - set_property(TARGET absl::hash_function_defaults - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::cord - absl::hash - absl::strings) - set_property(TARGET absl::hash_policy_traits PROPERTY INTERFACE_LINK_LIBRARIES - absl::meta) - set_property(TARGET absl::hashtable_debug PROPERTY INTERFACE_LINK_LIBRARIES - absl::hashtable_debug_hooks) - set_property(TARGET absl::hashtable_debug_hooks PROPERTY INTERFACE_LINK_LIBRARIES - absl::config) - set_property(TARGET absl::hashtablez_sampler - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base - absl::exponential_biased - absl::have_sse - absl::sample_recorder - absl::synchronization) - set_property(TARGET absl::inlined_vector - PROPERTY INTERFACE_LINK_LIBRARIES - absl::algorithm - absl::core_headers - absl::inlined_vector_internal - absl::throw_delegate - absl::memory) - set_property(TARGET absl::inlined_vector_internal - PROPERTY INTERFACE_LINK_LIBRARIES - absl::compressed_tuple - absl::core_headers - absl::memory - absl::span - absl::type_traits) - set_property(TARGET absl::int128 PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::core_headers absl::bits) - set_property(TARGET absl::kernel_timeout_internal - PROPERTY INTERFACE_LINK_LIBRARIES absl::core_headers - absl::raw_logging_internal absl::time) - set_property(TARGET absl::layout - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::core_headers - absl::meta - absl::strings - absl::span - absl::utility) - set_property(TARGET absl::leak_check PROPERTY INTERFACE_LINK_LIBRARIES absl::config + set_property(TARGET absl::hashtablez_sampler + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base + absl::exponential_biased + absl::have_sse + absl::sample_recorder + absl::synchronization) + set_property(TARGET absl::inlined_vector + PROPERTY INTERFACE_LINK_LIBRARIES + absl::algorithm + absl::core_headers + absl::inlined_vector_internal + absl::throw_delegate + absl::memory) + set_property(TARGET absl::inlined_vector_internal + PROPERTY INTERFACE_LINK_LIBRARIES + absl::compressed_tuple + absl::core_headers + absl::memory + absl::span + absl::type_traits) + set_property(TARGET absl::int128 PROPERTY INTERFACE_LINK_LIBRARIES absl::config + absl::core_headers absl::bits) + set_property(TARGET absl::kernel_timeout_internal + PROPERTY INTERFACE_LINK_LIBRARIES absl::core_headers + absl::raw_logging_internal absl::time) + set_property(TARGET absl::layout + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::core_headers + absl::meta + absl::strings + absl::span + absl::utility) + set_property(TARGET absl::leak_check PROPERTY INTERFACE_LINK_LIBRARIES absl::config + absl::core_headers) + set_property(TARGET absl::log_severity PROPERTY INTERFACE_LINK_LIBRARIES absl::core_headers) - set_property(TARGET absl::log_severity PROPERTY INTERFACE_LINK_LIBRARIES - absl::core_headers) - set_property(TARGET absl::low_level_hash - PROPERTY INTERFACE_LINK_LIBRARIES - absl::bits - absl::config - absl::endian - absl::int128) - set_property(TARGET absl::malloc_internal - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base - absl::base_internal - absl::config - absl::core_headers - absl::dynamic_annotations - absl::raw_logging_internal) - set_property(TARGET absl::memory PROPERTY INTERFACE_LINK_LIBRARIES absl::core_headers - absl::meta) - set_property(TARGET absl::meta PROPERTY INTERFACE_LINK_LIBRARIES absl::type_traits) - set_property(TARGET absl::node_hash_map - PROPERTY INTERFACE_LINK_LIBRARIES - absl::container_memory - absl::hash_function_defaults - absl::node_hash_policy - absl::raw_hash_map - absl::algorithm_container - absl::memory) - set_property(TARGET absl::node_hash_policy PROPERTY INTERFACE_LINK_LIBRARIES - absl::config) - set_property(TARGET absl::node_hash_set - PROPERTY INTERFACE_LINK_LIBRARIES - absl::hash_function_defaults - absl::node_hash_policy - absl::raw_hash_set - absl::algorithm_container - absl::memory) - set_property(TARGET absl::numeric PROPERTY INTERFACE_LINK_LIBRARIES absl::int128) - set_property(TARGET absl::numeric_representation PROPERTY INTERFACE_LINK_LIBRARIES - absl::config) - set_property(TARGET absl::optional - PROPERTY INTERFACE_LINK_LIBRARIES - absl::bad_optional_access - absl::base_internal - absl::config - absl::core_headers - absl::memory - absl::type_traits - absl::utility) - set_property(TARGET absl::periodic_sampler - PROPERTY INTERFACE_LINK_LIBRARIES absl::core_headers - absl::exponential_biased) - set_property(TARGET absl::random_bit_gen_ref - PROPERTY INTERFACE_LINK_LIBRARIES - absl::core_headers - absl::random_internal_distribution_caller - absl::random_internal_fast_uniform_bits - absl::type_traits) - set_property(TARGET absl::random_distributions - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base_internal - absl::config - absl::core_headers - absl::random_internal_generate_real - absl::random_internal_distribution_caller - absl::random_internal_fast_uniform_bits - absl::random_internal_fastmath - absl::random_internal_iostream_state_saver - absl::random_internal_traits - absl::random_internal_uniform_helper - absl::random_internal_wide_multiply - absl::strings - absl::type_traits) - set_property(TARGET absl::random_internal_distribution_caller - PROPERTY INTERFACE_LINK_LIBRARIES absl::config absl::utility - absl::fast_type_id) - set_property(TARGET absl::random_internal_distribution_test_util - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::core_headers - absl::raw_logging_internal - absl::strings - absl::str_format - absl::span) - set_property(TARGET absl::random_internal_fast_uniform_bits - PROPERTY INTERFACE_LINK_LIBRARIES absl::config) - set_property(TARGET absl::random_internal_fastmath PROPERTY INTERFACE_LINK_LIBRARIES - absl::bits) - set_property(TARGET absl::random_internal_generate_real - PROPERTY INTERFACE_LINK_LIBRARIES - absl::bits - absl::random_internal_fastmath - absl::random_internal_traits - absl::type_traits) - set_property(TARGET absl::random_internal_iostream_state_saver - PROPERTY INTERFACE_LINK_LIBRARIES absl::int128 absl::type_traits) - set_property(TARGET absl::random_internal_mock_helpers - PROPERTY INTERFACE_LINK_LIBRARIES absl::fast_type_id absl::optional) - set_property(TARGET absl::random_internal_nonsecure_base - PROPERTY INTERFACE_LINK_LIBRARIES - absl::core_headers - absl::optional - absl::random_internal_pool_urbg - absl::random_internal_salted_seed_seq - absl::random_internal_seed_material - absl::span - absl::type_traits) - set_property(TARGET absl::random_internal_pcg_engine - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::int128 - absl::random_internal_fastmath - absl::random_internal_iostream_state_saver - absl::type_traits) - set_property(TARGET absl::random_internal_platform PROPERTY INTERFACE_LINK_LIBRARIES - absl::config) - set_property(TARGET absl::random_internal_pool_urbg - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base - absl::config - absl::core_headers - absl::endian - absl::random_internal_randen - absl::random_internal_seed_material - absl::random_internal_traits - absl::random_seed_gen_exception - absl::raw_logging_internal - absl::span) - set_property(TARGET absl::random_internal_randen - PROPERTY INTERFACE_LINK_LIBRARIES absl::random_internal_platform - absl::random_internal_randen_hwaes - absl::random_internal_randen_slow) - set_property(TARGET absl::random_internal_randen_engine - PROPERTY INTERFACE_LINK_LIBRARIES - absl::endian - absl::random_internal_iostream_state_saver - absl::random_internal_randen - absl::raw_logging_internal - absl::type_traits) - set_property(TARGET absl::random_internal_randen_hwaes - PROPERTY INTERFACE_LINK_LIBRARIES absl::random_internal_platform - absl::random_internal_randen_hwaes_impl absl::config) - set_property(TARGET absl::random_internal_randen_hwaes_impl - PROPERTY INTERFACE_LINK_LIBRARIES absl::random_internal_platform - absl::config) - set_property(TARGET absl::random_internal_randen_slow - PROPERTY INTERFACE_LINK_LIBRARIES absl::random_internal_platform - absl::config) - set_property(TARGET absl::random_internal_salted_seed_seq - PROPERTY INTERFACE_LINK_LIBRARIES - absl::inlined_vector - absl::optional - absl::span - absl::random_internal_seed_material - absl::type_traits) - set_property(TARGET absl::random_internal_seed_material - PROPERTY INTERFACE_LINK_LIBRARIES - absl::core_headers - absl::optional - absl::random_internal_fast_uniform_bits - absl::raw_logging_internal - absl::span - absl::strings) - set_property(TARGET absl::random_internal_traits PROPERTY INTERFACE_LINK_LIBRARIES + set_property(TARGET absl::low_level_hash + PROPERTY INTERFACE_LINK_LIBRARIES + absl::bits + absl::config + absl::endian + absl::int128) + set_property(TARGET absl::malloc_internal + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base + absl::base_internal + absl::config + absl::core_headers + absl::dynamic_annotations + absl::raw_logging_internal) + set_property(TARGET absl::memory PROPERTY INTERFACE_LINK_LIBRARIES absl::core_headers + absl::meta) + set_property(TARGET absl::meta PROPERTY INTERFACE_LINK_LIBRARIES absl::type_traits) + set_property(TARGET absl::node_hash_map + PROPERTY INTERFACE_LINK_LIBRARIES + absl::container_memory + absl::hash_function_defaults + absl::node_hash_policy + absl::raw_hash_map + absl::algorithm_container + absl::memory) + set_property(TARGET absl::node_hash_policy PROPERTY INTERFACE_LINK_LIBRARIES + absl::config) + set_property(TARGET absl::node_hash_set + PROPERTY INTERFACE_LINK_LIBRARIES + absl::hash_function_defaults + absl::node_hash_policy + absl::raw_hash_set + absl::algorithm_container + absl::memory) + set_property(TARGET absl::numeric PROPERTY INTERFACE_LINK_LIBRARIES absl::int128) + set_property(TARGET absl::numeric_representation PROPERTY INTERFACE_LINK_LIBRARIES + absl::config) + set_property(TARGET absl::optional + PROPERTY INTERFACE_LINK_LIBRARIES + absl::bad_optional_access + absl::base_internal + absl::config + absl::core_headers + absl::memory + absl::type_traits + absl::utility) + set_property(TARGET absl::periodic_sampler + PROPERTY INTERFACE_LINK_LIBRARIES absl::core_headers + absl::exponential_biased) + set_property(TARGET absl::random_bit_gen_ref + PROPERTY INTERFACE_LINK_LIBRARIES + absl::core_headers + absl::random_internal_distribution_caller + absl::random_internal_fast_uniform_bits + absl::type_traits) + set_property(TARGET absl::random_distributions + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base_internal + absl::config + absl::core_headers + absl::random_internal_generate_real + absl::random_internal_distribution_caller + absl::random_internal_fast_uniform_bits + absl::random_internal_fastmath + absl::random_internal_iostream_state_saver + absl::random_internal_traits + absl::random_internal_uniform_helper + absl::random_internal_wide_multiply + absl::strings + absl::type_traits) + set_property(TARGET absl::random_internal_distribution_caller + PROPERTY INTERFACE_LINK_LIBRARIES absl::config absl::utility + absl::fast_type_id) + set_property(TARGET absl::random_internal_distribution_test_util + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::core_headers + absl::raw_logging_internal + absl::strings + absl::str_format + absl::span) + set_property(TARGET absl::random_internal_fast_uniform_bits + PROPERTY INTERFACE_LINK_LIBRARIES absl::config) + set_property(TARGET absl::random_internal_fastmath PROPERTY INTERFACE_LINK_LIBRARIES + absl::bits) + set_property(TARGET absl::random_internal_generate_real + PROPERTY INTERFACE_LINK_LIBRARIES + absl::bits + absl::random_internal_fastmath + absl::random_internal_traits + absl::type_traits) + set_property(TARGET absl::random_internal_iostream_state_saver + PROPERTY INTERFACE_LINK_LIBRARIES absl::int128 absl::type_traits) + set_property(TARGET absl::random_internal_mock_helpers + PROPERTY INTERFACE_LINK_LIBRARIES absl::fast_type_id absl::optional) + set_property(TARGET absl::random_internal_nonsecure_base + PROPERTY INTERFACE_LINK_LIBRARIES + absl::core_headers + absl::optional + absl::random_internal_pool_urbg + absl::random_internal_salted_seed_seq + absl::random_internal_seed_material + absl::span + absl::type_traits) + set_property(TARGET absl::random_internal_pcg_engine + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::int128 + absl::random_internal_fastmath + absl::random_internal_iostream_state_saver + absl::type_traits) + set_property(TARGET absl::random_internal_platform PROPERTY INTERFACE_LINK_LIBRARIES absl::config) - set_property(TARGET absl::random_internal_uniform_helper - PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::random_internal_traits absl::type_traits) - set_property(TARGET absl::random_internal_wide_multiply - PROPERTY INTERFACE_LINK_LIBRARIES absl::bits absl::config absl::int128) - set_property(TARGET absl::random_random - PROPERTY INTERFACE_LINK_LIBRARIES - absl::random_distributions - absl::random_internal_nonsecure_base - absl::random_internal_pcg_engine - absl::random_internal_pool_urbg - absl::random_internal_randen_engine - absl::random_seed_sequences) - set_property(TARGET absl::random_seed_gen_exception PROPERTY INTERFACE_LINK_LIBRARIES - absl::config) - set_property(TARGET absl::random_seed_sequences - PROPERTY INTERFACE_LINK_LIBRARIES - absl::inlined_vector - absl::random_internal_nonsecure_base - absl::random_internal_pool_urbg - absl::random_internal_salted_seed_seq - absl::random_internal_seed_material - absl::random_seed_gen_exception - absl::span) - set_property(TARGET absl::raw_hash_map - PROPERTY INTERFACE_LINK_LIBRARIES absl::container_memory - absl::raw_hash_set absl::throw_delegate) - set_property(TARGET absl::raw_hash_set - PROPERTY INTERFACE_LINK_LIBRARIES - absl::bits - absl::compressed_tuple - absl::config - absl::container_common - absl::container_memory - absl::core_headers - absl::endian - absl::hash_policy_traits - absl::hashtable_debug_hooks - absl::have_sse - absl::memory - absl::meta - absl::optional - absl::utility - absl::hashtablez_sampler) - set_property(TARGET absl::raw_logging_internal - PROPERTY INTERFACE_LINK_LIBRARIES - absl::atomic_hook - absl::config - absl::core_headers - absl::log_severity) - set_property(TARGET absl::sample_recorder PROPERTY INTERFACE_LINK_LIBRARIES - absl::base absl::synchronization) - set_property(TARGET absl::scoped_set_env - PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::raw_logging_internal) - set_property(TARGET absl::span - PROPERTY INTERFACE_LINK_LIBRARIES - absl::algorithm - absl::core_headers - absl::throw_delegate - absl::type_traits) - set_property(TARGET absl::spinlock_wait - PROPERTY INTERFACE_LINK_LIBRARIES absl::base_internal absl::core_headers - absl::errno_saver) - set_property(TARGET absl::stacktrace - PROPERTY INTERFACE_LINK_LIBRARIES absl::debugging_internal absl::config - absl::core_headers) - set_property(TARGET absl::status - PROPERTY INTERFACE_LINK_LIBRARIES - absl::atomic_hook - absl::config - absl::core_headers - absl::function_ref - absl::raw_logging_internal - absl::inlined_vector - absl::stacktrace - absl::symbolize - absl::strings - absl::cord - absl::str_format - absl::optional) - set_property(TARGET absl::statusor - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base - absl::status - absl::core_headers - absl::raw_logging_internal - absl::type_traits - absl::strings - absl::utility - absl::variant) - set_property(TARGET absl::str_format PROPERTY INTERFACE_LINK_LIBRARIES - absl::str_format_internal) - set_property(TARGET absl::str_format_internal - PROPERTY INTERFACE_LINK_LIBRARIES - absl::bits - absl::strings - absl::config - absl::core_headers - absl::numeric_representation - absl::type_traits - absl::int128 - absl::span) - set_property(TARGET absl::strerror PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::core_headers absl::errno_saver) - set_property(TARGET absl::strings - PROPERTY INTERFACE_LINK_LIBRARIES - absl::strings_internal - absl::base - absl::bits - absl::config - absl::core_headers - absl::endian - absl::int128 - absl::memory - absl::raw_logging_internal - absl::throw_delegate - absl::type_traits) - set_property(TARGET absl::strings_internal - PROPERTY INTERFACE_LINK_LIBRARIES - absl::config - absl::core_headers - absl::endian - absl::raw_logging_internal - absl::type_traits) - set_property(TARGET absl::symbolize - PROPERTY INTERFACE_LINK_LIBRARIES - absl::debugging_internal - absl::demangle_internal - absl::base - absl::config - absl::core_headers - absl::dynamic_annotations - absl::malloc_internal - absl::raw_logging_internal - absl::strings) - set_property(TARGET absl::synchronization - PROPERTY INTERFACE_LINK_LIBRARIES - absl::graphcycles_internal - absl::kernel_timeout_internal - absl::atomic_hook - absl::base - absl::base_internal - absl::config - absl::core_headers - absl::dynamic_annotations - absl::malloc_internal - absl::raw_logging_internal - absl::stacktrace - absl::symbolize - absl::time) - set_property(TARGET absl::throw_delegate - PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::raw_logging_internal) + set_property(TARGET absl::random_internal_pool_urbg + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base + absl::config + absl::core_headers + absl::endian + absl::random_internal_randen + absl::random_internal_seed_material + absl::random_internal_traits + absl::random_seed_gen_exception + absl::raw_logging_internal + absl::span) + set_property(TARGET absl::random_internal_randen + PROPERTY INTERFACE_LINK_LIBRARIES absl::random_internal_platform + absl::random_internal_randen_hwaes + absl::random_internal_randen_slow) + set_property(TARGET absl::random_internal_randen_engine + PROPERTY INTERFACE_LINK_LIBRARIES + absl::endian + absl::random_internal_iostream_state_saver + absl::random_internal_randen + absl::raw_logging_internal + absl::type_traits) + set_property(TARGET absl::random_internal_randen_hwaes + PROPERTY INTERFACE_LINK_LIBRARIES absl::random_internal_platform + absl::random_internal_randen_hwaes_impl absl::config) + set_property(TARGET absl::random_internal_randen_hwaes_impl + PROPERTY INTERFACE_LINK_LIBRARIES absl::random_internal_platform + absl::config) + set_property(TARGET absl::random_internal_randen_slow + PROPERTY INTERFACE_LINK_LIBRARIES absl::random_internal_platform + absl::config) + set_property(TARGET absl::random_internal_salted_seed_seq + PROPERTY INTERFACE_LINK_LIBRARIES + absl::inlined_vector + absl::optional + absl::span + absl::random_internal_seed_material + absl::type_traits) + set_property(TARGET absl::random_internal_seed_material + PROPERTY INTERFACE_LINK_LIBRARIES + absl::core_headers + absl::optional + absl::random_internal_fast_uniform_bits + absl::raw_logging_internal + absl::span + absl::strings) + set_property(TARGET absl::random_internal_traits PROPERTY INTERFACE_LINK_LIBRARIES + absl::config) + set_property(TARGET absl::random_internal_uniform_helper + PROPERTY INTERFACE_LINK_LIBRARIES absl::config + absl::random_internal_traits absl::type_traits) + set_property(TARGET absl::random_internal_wide_multiply + PROPERTY INTERFACE_LINK_LIBRARIES absl::bits absl::config absl::int128) + set_property(TARGET absl::random_random + PROPERTY INTERFACE_LINK_LIBRARIES + absl::random_distributions + absl::random_internal_nonsecure_base + absl::random_internal_pcg_engine + absl::random_internal_pool_urbg + absl::random_internal_randen_engine + absl::random_seed_sequences) + set_property(TARGET absl::random_seed_gen_exception PROPERTY INTERFACE_LINK_LIBRARIES + absl::config) + set_property(TARGET absl::random_seed_sequences + PROPERTY INTERFACE_LINK_LIBRARIES + absl::inlined_vector + absl::random_internal_nonsecure_base + absl::random_internal_pool_urbg + absl::random_internal_salted_seed_seq + absl::random_internal_seed_material + absl::random_seed_gen_exception + absl::span) + set_property(TARGET absl::raw_hash_map + PROPERTY INTERFACE_LINK_LIBRARIES absl::container_memory + absl::raw_hash_set absl::throw_delegate) + set_property(TARGET absl::raw_hash_set + PROPERTY INTERFACE_LINK_LIBRARIES + absl::bits + absl::compressed_tuple + absl::config + absl::container_common + absl::container_memory + absl::core_headers + absl::endian + absl::hash_policy_traits + absl::hashtable_debug_hooks + absl::have_sse + absl::memory + absl::meta + absl::optional + absl::utility + absl::hashtablez_sampler) + set_property(TARGET absl::raw_logging_internal + PROPERTY INTERFACE_LINK_LIBRARIES + absl::atomic_hook + absl::config + absl::core_headers + absl::log_severity) + set_property(TARGET absl::sample_recorder PROPERTY INTERFACE_LINK_LIBRARIES absl::base + absl::synchronization) + set_property(TARGET absl::scoped_set_env PROPERTY INTERFACE_LINK_LIBRARIES absl::config + absl::raw_logging_internal) + set_property(TARGET absl::span + PROPERTY INTERFACE_LINK_LIBRARIES + absl::algorithm + absl::core_headers + absl::throw_delegate + absl::type_traits) + set_property(TARGET absl::spinlock_wait + PROPERTY INTERFACE_LINK_LIBRARIES absl::base_internal absl::core_headers + absl::errno_saver) + set_property(TARGET absl::stacktrace + PROPERTY INTERFACE_LINK_LIBRARIES absl::debugging_internal absl::config + absl::core_headers) + set_property(TARGET absl::status + PROPERTY INTERFACE_LINK_LIBRARIES + absl::atomic_hook + absl::config + absl::core_headers + absl::function_ref + absl::raw_logging_internal + absl::inlined_vector + absl::stacktrace + absl::symbolize + absl::strings + absl::cord + absl::str_format + absl::optional) + set_property(TARGET absl::statusor + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base + absl::status + absl::core_headers + absl::raw_logging_internal + absl::type_traits + absl::strings + absl::utility + absl::variant) + set_property(TARGET absl::str_format PROPERTY INTERFACE_LINK_LIBRARIES + absl::str_format_internal) + set_property(TARGET absl::str_format_internal + PROPERTY INTERFACE_LINK_LIBRARIES + absl::bits + absl::strings + absl::config + absl::core_headers + absl::numeric_representation + absl::type_traits + absl::int128 + absl::span) + set_property(TARGET absl::strerror PROPERTY INTERFACE_LINK_LIBRARIES absl::config + absl::core_headers absl::errno_saver) + set_property(TARGET absl::strings + PROPERTY INTERFACE_LINK_LIBRARIES + absl::strings_internal + absl::base + absl::bits + absl::config + absl::core_headers + absl::endian + absl::int128 + absl::memory + absl::raw_logging_internal + absl::throw_delegate + absl::type_traits) + set_property(TARGET absl::strings_internal + PROPERTY INTERFACE_LINK_LIBRARIES + absl::config + absl::core_headers + absl::endian + absl::raw_logging_internal + absl::type_traits) + set_property(TARGET absl::symbolize + PROPERTY INTERFACE_LINK_LIBRARIES + absl::debugging_internal + absl::demangle_internal + absl::base + absl::config + absl::core_headers + absl::dynamic_annotations + absl::malloc_internal + absl::raw_logging_internal + absl::strings) + set_property(TARGET absl::synchronization + PROPERTY INTERFACE_LINK_LIBRARIES + absl::graphcycles_internal + absl::kernel_timeout_internal + absl::atomic_hook + absl::base + absl::base_internal + absl::config + absl::core_headers + absl::dynamic_annotations + absl::malloc_internal + absl::raw_logging_internal + absl::stacktrace + absl::symbolize + absl::time) + set_property(TARGET absl::throw_delegate PROPERTY INTERFACE_LINK_LIBRARIES absl::config + absl::raw_logging_internal) + set_property(TARGET absl::time + PROPERTY INTERFACE_LINK_LIBRARIES + absl::base + absl::civil_time + absl::core_headers + absl::int128 + absl::raw_logging_internal + absl::strings + absl::time_zone) + set_property(TARGET absl::type_traits PROPERTY INTERFACE_LINK_LIBRARIES absl::config) + set_property(TARGET absl::utility PROPERTY INTERFACE_LINK_LIBRARIES absl::base_internal + absl::config absl::type_traits) + set_property(TARGET absl::variant + PROPERTY INTERFACE_LINK_LIBRARIES + absl::bad_variant_access + absl::base_internal + absl::config + absl::core_headers + absl::type_traits + absl::utility) + set_property(TARGET absl::wyhash PROPERTY INTERFACE_LINK_LIBRARIES absl::config + absl::endian absl::int128) + + if(APPLE) + # This is due to upstream absl::cctz issue + # https://github.com/abseil/abseil-cpp/issues/283 + find_library(CoreFoundation CoreFoundation) set_property(TARGET absl::time - PROPERTY INTERFACE_LINK_LIBRARIES - absl::base - absl::civil_time - absl::core_headers - absl::int128 - absl::raw_logging_internal - absl::strings - absl::time_zone) - set_property(TARGET absl::type_traits PROPERTY INTERFACE_LINK_LIBRARIES absl::config) - set_property(TARGET absl::utility - PROPERTY INTERFACE_LINK_LIBRARIES absl::base_internal absl::config - absl::type_traits) - set_property(TARGET absl::variant - PROPERTY INTERFACE_LINK_LIBRARIES - absl::bad_variant_access - absl::base_internal - absl::config - absl::core_headers - absl::type_traits - absl::utility) - set_property(TARGET absl::wyhash PROPERTY INTERFACE_LINK_LIBRARIES absl::config - absl::endian absl::int128) - - if(APPLE) - # This is due to upstream absl::cctz issue - # https://github.com/abseil/abseil-cpp/issues/283 - find_library(CoreFoundation CoreFoundation) - set_property(TARGET absl::time - APPEND - PROPERTY INTERFACE_LINK_LIBRARIES ${CoreFoundation}) - endif() + APPEND + PROPERTY INTERFACE_LINK_LIBRARIES ${CoreFoundation}) + endif() - externalproject_add(absl_ep - ${EP_LOG_OPTIONS} - URL ${ABSL_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ABSL_BUILD_SHA256_CHECKSUM}" - CMAKE_ARGS ${ABSL_CMAKE_ARGS} - BUILD_BYPRODUCTS ${ABSL_BUILD_BYPRODUCTS}) + externalproject_add(absl_ep + ${EP_LOG_OPTIONS} + URL ${ABSL_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ABSL_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${ABSL_CMAKE_ARGS} + BUILD_BYPRODUCTS ${ABSL_BUILD_BYPRODUCTS}) - # Work around https://gitlab.kitware.com/cmake/cmake/issues/15052 - file(MAKE_DIRECTORY ${ABSL_INCLUDE_DIR}) + # Work around https://gitlab.kitware.com/cmake/cmake/issues/15052 + file(MAKE_DIRECTORY ${ABSL_INCLUDE_DIR}) - set(ABSL_VENDORED TRUE) - endif() + set(ABSL_VENDORED TRUE) endmacro() macro(build_grpc) @@ -3606,9 +3650,7 @@ macro(build_grpc) TRUE PC_PACKAGE_NAMES libcares) - - # First need Abseil - resolve_dependency_absl() + ensure_absl() message(STATUS "Building gRPC from source") @@ -3890,12 +3932,14 @@ macro(build_grpc) list(APPEND ARROW_BUNDLED_STATIC_LIBS - ${GRPC_GPR_ABSL_LIBRARIES} gRPC::address_sorting gRPC::gpr gRPC::grpc gRPC::grpcpp_for_bundling gRPC::upb) + if(ABS_VENDORED) + list(APPEND ARROW_BUNDLED_STATIC_LIBS ${GRPC_GPR_ABSL_LIBRARIES}) + endif() endmacro() if(ARROW_WITH_GRPC) @@ -3921,7 +3965,9 @@ if(ARROW_WITH_GRPC) # grpc++ headers may reside in ${GRPC_INCLUDE_DIR}/grpc++ or ${GRPC_INCLUDE_DIR}/grpcpp # depending on the gRPC version. get_target_property(GRPC_INCLUDE_DIR gRPC::grpc++ INTERFACE_INCLUDE_DIRECTORIES) - if(EXISTS "${GRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h") + if(GRPC_INCLUDE_DIR MATCHES "^\\$<" + OR # generator expression + EXISTS "${GRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h") set(GRPCPP_PP_INCLUDE TRUE) elseif(EXISTS "${GRPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h") set(GRPCPP_PP_INCLUDE FALSE) @@ -4012,7 +4058,7 @@ macro(build_google_cloud_cpp_storage) message(STATUS "Only building the google-cloud-cpp::storage component") # List of dependencies taken from https://github.com/googleapis/google-cloud-cpp/blob/master/doc/packaging.md - resolve_dependency_absl() + ensure_absl() build_crc32c_once() # Curl is required on all platforms, but building it internally might also trip over S3's copy. @@ -4260,7 +4306,7 @@ macro(build_orc) INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) - get_target_property(ORC_LZ4_ROOT lz4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) # Weirdly passing in PROTOBUF_LIBRARY for PROTOC_LIBRARY still results in ORC finding @@ -4304,7 +4350,7 @@ macro(build_orc) set(ORC_VENDORED 1) add_dependencies(orc_ep ZLIB::ZLIB) - add_dependencies(orc_ep lz4::lz4) + add_dependencies(orc_ep LZ4::lz4) add_dependencies(orc_ep ${Snappy_TARGET}) add_dependencies(orc_ep ${ARROW_PROTOBUF_LIBPROTOBUF}) @@ -4522,11 +4568,11 @@ macro(build_opentelemetry) foreach(_OPENTELEMETRY_LIB ${_OPENTELEMETRY_LIBS}) add_dependencies(opentelemetry-cpp::${_OPENTELEMETRY_LIB} opentelemetry_ep) + list(APPEND ARROW_BUNDLED_STATIC_LIBS opentelemetry-cpp::${_OPENTELEMETRY_LIB}) endforeach() # Work around https://gitlab.kitware.com/cmake/cmake/issues/15052 file(MAKE_DIRECTORY ${OPENTELEMETRY_INCLUDE_DIR}) - endmacro() if(ARROW_WITH_OPENTELEMETRY) diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt index 0514bf9127510..88b760e3978f2 100644 --- a/cpp/examples/arrow/CMakeLists.txt +++ b/cpp/examples/arrow/CMakeLists.txt @@ -138,4 +138,16 @@ if(ARROW_PARQUET AND ARROW_DATASET) add_arrow_example(udf_example) + if(ARROW_SKYHOOK) + if(ARROW_BUILD_SHARED) + list(APPEND DATASET_EXAMPLES_LINK_LIBS arrow_skyhook_shared) + else() + list(APPEND DATASET_EXAMPLES_LINK_LIBS arrow_skyhook_static) + endif() + + add_arrow_example(dataset_skyhook_scan_example EXTRA_LINK_LIBS + ${DATASET_EXAMPLES_LINK_LIBS}) + add_dependencies(dataset-skyhook-scan-example parquet) + endif() + endif() diff --git a/cpp/examples/arrow/dataset_skyhook_scan_example.cc b/cpp/examples/arrow/dataset_skyhook_scan_example.cc new file mode 100644 index 0000000000000..2d391723bd09b --- /dev/null +++ b/cpp/examples/arrow/dataset_skyhook_scan_example.cc @@ -0,0 +1,184 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +using arrow::field; +using arrow::int16; +using arrow::Schema; +using arrow::Table; + +namespace fs = arrow::fs; + +namespace ds = arrow::dataset; + +namespace cp = arrow::compute; + +struct Configuration { + // Indicates if the Scanner::ToTable should consume in parallel. + bool use_threads = true; + + // Indicates to the Scan operator which columns are requested. This + // optimization avoid deserializing unneeded columns. + std::vector projected_columns = {"total_amount"}; + + // Indicates the filter by which rows will be filtered. This optimization can + // make use of partition information and/or file metadata if possible. + cp::Expression filter = cp::greater(cp::field_ref("payment_type"), cp::literal(1)); + + ds::InspectOptions inspect_options{}; + ds::FinishOptions finish_options{}; +} kConf; + +arrow::Result> GetDatasetFromDirectory( + std::shared_ptr fs, std::shared_ptr format, + std::string dir) { + // Find all files under `path` + fs::FileSelector s; + s.base_dir = dir; + s.recursive = true; + + // Set partitioning strategy + ds::FileSystemFactoryOptions options; + options.partitioning = std::make_shared( + arrow::schema({arrow::field("payment_type", arrow::int32()), + arrow::field("VendorID", arrow::int32())})); + + // The factory will try to build a dataset. + ARROW_ASSIGN_OR_RAISE(auto factory, + ds::FileSystemDatasetFactory::Make(fs, s, format, options)); + + // Try to infer a common schema for all files. + ARROW_ASSIGN_OR_RAISE(auto schema, factory->Inspect(kConf.inspect_options)); + // Caller can optionally decide another schema as long as it is compatible + // with the previous one, e.g. `factory->Finish(compatible_schema)`. + ARROW_ASSIGN_OR_RAISE(auto dataset, factory->Finish(kConf.finish_options)); + + return dataset; +} + +arrow::Result> GetDatasetFromFile( + std::shared_ptr fs, std::shared_ptr format, + std::string file) { + ds::FileSystemFactoryOptions options; + // The factory will try to build a dataset. + ARROW_ASSIGN_OR_RAISE(auto factory, + ds::FileSystemDatasetFactory::Make(fs, {file}, format, options)); + + // Try to infer a common schema for all files. + ARROW_ASSIGN_OR_RAISE(auto schema, factory->Inspect(kConf.inspect_options)); + // Caller can optionally decide another schema as long as it is compatible + // with the previous one, e.g. `factory->Finish(compatible_schema)`. + ARROW_ASSIGN_OR_RAISE(auto dataset, factory->Finish(kConf.finish_options)); + + return dataset; +} + +arrow::Result> GetDatasetFromPath( + std::shared_ptr fs, std::shared_ptr format, + std::string path) { + ARROW_ASSIGN_OR_RAISE(auto info, fs->GetFileInfo(path)); + if (info.IsDirectory()) { + return GetDatasetFromDirectory(fs, format, path); + } + return GetDatasetFromFile(fs, format, path); +} + +arrow::Result> GetScannerFromDataset( + std::shared_ptr dataset, std::vector columns, + cp::Expression filter, bool use_threads) { + ARROW_ASSIGN_OR_RAISE(auto scanner_builder, dataset->NewScan()); + + if (!columns.empty()) { + ARROW_RETURN_NOT_OK(scanner_builder->Project(columns)); + } + + ARROW_RETURN_NOT_OK(scanner_builder->Filter(filter)); + + ARROW_RETURN_NOT_OK(scanner_builder->UseThreads(use_threads)); + + return scanner_builder->Finish(); +} + +arrow::Result> InstantiateSkyhookFormat() { + // Path to the Ceph configuration file. It contains cluster wide configuration + // and most importantly the connection information to the Ceph cluster. + std::string ceph_config_path = "/etc/ceph/ceph.conf"; + + // Ceph data pool containing the objects to be scanned. + // The default data pool is "cephfs_data". + std::string ceph_data_pool = "cephfs_data"; + + // The user accessing the Ceph cluster. The default username is "client.admin". + std::string ceph_user_name = "client.admin"; + + // Cluster name is an unique identifier for a Ceph cluster. It is especially + // required when you run multiple Ceph clusters on a multi-site architecture + // where the cluster name identifies the Ceph cluster for the + // current session. The default cluster name is "ceph". + std::string ceph_cluster_name = "ceph"; + + // CLS name is used to identify the shared library that needs to be loaded + // in the Ceph OSDs when invoking an object class method. For Skyhook, the + // library name is "libcls_skyhook.so", and the object class name is "skyhook". + std::string ceph_cls_name = "skyhook"; + std::shared_ptr rados_ctx = + std::make_shared(ceph_config_path, ceph_data_pool, + ceph_user_name, ceph_cluster_name, + ceph_cls_name); + ARROW_ASSIGN_OR_RAISE(auto format, + skyhook::SkyhookFileFormat::Make(rados_ctx, "parquet")); + return format; +} + +arrow::Status Main(std::string dataset_root) { + ARROW_ASSIGN_OR_RAISE(auto format, InstantiateSkyhookFormat()); + std::string path; + + ARROW_ASSIGN_OR_RAISE(auto fs, fs::FileSystemFromUri(dataset_root, &path)); + ARROW_ASSIGN_OR_RAISE(auto dataset, GetDatasetFromPath(fs, format, path)); + ARROW_ASSIGN_OR_RAISE( + auto scanner, GetScannerFromDataset(dataset, kConf.projected_columns, kConf.filter, + kConf.use_threads)); + ARROW_ASSIGN_OR_RAISE(auto table, scanner->ToTable()); + std::cout << "Table size: " << table->num_rows() << "\n"; + return arrow::Status::OK(); +} + +int main(int argc, char** argv) { + if (argc != 2) { + // Fake success for CI purposes. + return EXIT_SUCCESS; + } + auto status = Main(argv[1]); + if (!status.ok()) { + std::cerr << status.ToString() << std::endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/cpp/examples/parquet/parquet_arrow/CMakeLists.txt b/cpp/examples/parquet/parquet_arrow/CMakeLists.txt index 32f980060c95a..c89751731575f 100644 --- a/cpp/examples/parquet/parquet_arrow/CMakeLists.txt +++ b/cpp/examples/parquet/parquet_arrow/CMakeLists.txt @@ -24,7 +24,7 @@ include(ExternalProject) include(FindPkgConfig) include(GNUInstallDirs) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules") +option(PARQUET_LINK_SHARED "Link to the Parquet shared library" ON) # This ensures that things like gnu++11 get passed correctly if(NOT DEFINED CMAKE_CXX_STANDARD) @@ -39,4 +39,8 @@ find_package(Arrow REQUIRED) find_package(Parquet REQUIRED) add_executable(parquet-arrow-example reader_writer.cc) -target_link_libraries(parquet-arrow-example parquet_shared arrow_shared) +if(PARQUET_LINK_SHARED) + target_link_libraries(parquet-arrow-example parquet_shared) +else() + target_link_libraries(parquet-arrow-example parquet_static) +endif() diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 93dd1297bd744..1e30e50a31700 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -395,7 +395,6 @@ if(ARROW_COMPUTE) compute/exec/hash_join.cc compute/exec/hash_join_dict.cc compute/exec/hash_join_node.cc - compute/exec/ir_consumer.cc compute/exec/key_hash.cc compute/exec/key_map.cc compute/exec/order_by_impl.cc @@ -806,10 +805,6 @@ if(ARROW_ORC) add_subdirectory(adapters/orc) endif() -if(ARROW_PYTHON) - add_subdirectory(python) -endif() - if(ARROW_TENSORFLOW) add_subdirectory(adapters/tensorflow) endif() diff --git a/cpp/src/arrow/adapters/orc/CMakeLists.txt b/cpp/src/arrow/adapters/orc/CMakeLists.txt index d7cc6524bc9b7..6b2536bb55503 100644 --- a/cpp/src/arrow/adapters/orc/CMakeLists.txt +++ b/cpp/src/arrow/adapters/orc/CMakeLists.txt @@ -30,7 +30,7 @@ set(ORC_MIN_TEST_LIBS GTest::gtest_main GTest::gtest ${Snappy_TARGET} - lz4::lz4 + LZ4::lz4 ZLIB::ZLIB) if(ARROW_BUILD_STATIC) diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc index 6914d6b9c1861..1efc02bc404db 100644 --- a/cpp/src/arrow/adapters/orc/adapter_test.cc +++ b/cpp/src/arrow/adapters/orc/adapter_test.cc @@ -173,7 +173,7 @@ void RandWeakComposition(int64_t n, T sum, std::vector* out) { return static_cast(res); }); (*out)[n - 1] += remaining_sum; - std::random_shuffle(out->begin(), out->end()); + std::shuffle(out->begin(), out->end(), gen); } std::shared_ptr GenerateRandomChunkedArray( diff --git a/cpp/src/arrow/builder_benchmark.cc b/cpp/src/arrow/builder_benchmark.cc index c131f81392794..97745d4692e10 100644 --- a/cpp/src/arrow/builder_benchmark.cc +++ b/cpp/src/arrow/builder_benchmark.cc @@ -36,6 +36,7 @@ namespace arrow { using ValueType = int64_t; using VectorType = std::vector; + constexpr int64_t kNumberOfElements = 256 * 512; static VectorType AlmostU8CompressibleVector() { diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc index 840dd04a5ad4e..c5e6d7fa4bdf0 100644 --- a/cpp/src/arrow/chunked_array.cc +++ b/cpp/src/arrow/chunked_array.cc @@ -72,7 +72,7 @@ Result> ChunkedArray::Make(ArrayVector chunks, } for (const auto& chunk : chunks) { if (!chunk->type()->Equals(*type)) { - return Status::Invalid("Array chunks must all be same type"); + return Status::TypeError("Array chunks must all be same type"); } } return std::make_shared(std::move(chunks), std::move(type)); diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc index d1dc69de274b6..08410b4cd5367 100644 --- a/cpp/src/arrow/chunked_array_test.cc +++ b/cpp/src/arrow/chunked_array_test.cc @@ -65,8 +65,8 @@ TEST_F(TestChunkedArray, Make) { ASSERT_OK_AND_ASSIGN(auto result2, ChunkedArray::Make({chunk0, chunk0}, int8())); AssertChunkedEqual(*result, *result2); - ASSERT_RAISES(Invalid, ChunkedArray::Make({chunk0, chunk1})); - ASSERT_RAISES(Invalid, ChunkedArray::Make({chunk0}, int16())); + ASSERT_RAISES(TypeError, ChunkedArray::Make({chunk0, chunk1})); + ASSERT_RAISES(TypeError, ChunkedArray::Make({chunk0}, int16())); } TEST_F(TestChunkedArray, MakeEmpty) { diff --git a/cpp/src/arrow/compute/api.h b/cpp/src/arrow/compute/api.h index 80582e47b7445..3539bab038a2c 100644 --- a/cpp/src/arrow/compute/api.h +++ b/cpp/src/arrow/compute/api.h @@ -28,7 +28,6 @@ #include "arrow/compute/api_scalar.h" // IWYU pragma: export #include "arrow/compute/api_vector.h" // IWYU pragma: export #include "arrow/compute/cast.h" // IWYU pragma: export -#include "arrow/compute/exec.h" // IWYU pragma: export #include "arrow/compute/function.h" // IWYU pragma: export #include "arrow/compute/kernel.h" // IWYU pragma: export #include "arrow/compute/registry.h" // IWYU pragma: export @@ -52,3 +51,9 @@ /// @} #include "arrow/compute/row/grouper.h" // IWYU pragma: export + +/// \defgroup execnode-components Components associated with ExecNode +/// @{ +/// @} + +#include "arrow/compute/exec.h" // IWYU pragma: export diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h index cdd3daf7f74ff..d03b073bb88a0 100644 --- a/cpp/src/arrow/compute/exec.h +++ b/cpp/src/arrow/compute/exec.h @@ -174,6 +174,10 @@ class ARROW_EXPORT SelectionVector { /// TODO: Datum uses arrow/util/variant.h which may be a bit heavier-weight /// than is desirable for this class. Microbenchmarks would help determine for /// sure. See ARROW-8928. + +/// \addtogroup execnode-components +/// @{ + struct ARROW_EXPORT ExecBatch { ExecBatch() = default; ExecBatch(std::vector values, int64_t length) @@ -400,6 +404,8 @@ struct ARROW_EXPORT ExecSpan { std::vector values; }; +/// @} + /// \defgroup compute-call-function One-shot calls to compute functions /// /// @{ diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt index d59cb30986b56..4ce73359d0f1f 100644 --- a/cpp/src/arrow/compute/exec/CMakeLists.txt +++ b/cpp/src/arrow/compute/exec/CMakeLists.txt @@ -78,11 +78,3 @@ if(ARROW_BUILD_OPENMP_BENCHMARKS) PRIVATE "-openmp:experimental -openmp:llvm") endif() endif() - -add_arrow_compute_test(ir_test - PREFIX - "arrow-compute" - EXTRA_LINK_LIBS - ${GFLAGS_LIBRARIES} - TEST_ARGUMENTS - "--computeir_dir=${CMAKE_SOURCE_DIR}/../experimental/computeir") diff --git a/cpp/src/arrow/compute/exec/asof_join_benchmark.cc b/cpp/src/arrow/compute/exec/asof_join_benchmark.cc index af471a501324e..543a4ece575bb 100644 --- a/cpp/src/arrow/compute/exec/asof_join_benchmark.cc +++ b/cpp/src/arrow/compute/exec/asof_join_benchmark.cc @@ -89,7 +89,7 @@ static void TableJoinOverhead(benchmark::State& state, ASSERT_OK_AND_ASSIGN(arrow::compute::ExecNode * join_node, MakeExecNode(factory_name, plan.get(), input_nodes, options)); AsyncGenerator> sink_gen; - MakeExecNode("sink", plan.get(), {join_node}, SinkNodeOptions{&sink_gen}); + ASSERT_OK(MakeExecNode("sink", plan.get(), {join_node}, SinkNodeOptions{&sink_gen})); state.ResumeTiming(); ASSERT_FINISHES_OK(StartAndCollect(plan.get(), sink_gen)); } diff --git a/cpp/src/arrow/compute/exec/exec_plan.h b/cpp/src/arrow/compute/exec/exec_plan.h index 5e52f606a69d5..a07884b2231cf 100644 --- a/cpp/src/arrow/compute/exec/exec_plan.h +++ b/cpp/src/arrow/compute/exec/exec_plan.h @@ -38,6 +38,9 @@ namespace arrow { namespace compute { +/// \addtogroup execnode-components +/// @{ + class ARROW_EXPORT ExecPlan : public std::enable_shared_from_this { public: // This allows operators to rely on signed 16-bit indices @@ -534,5 +537,7 @@ Result>()>> MakeReaderGenerator( std::shared_ptr reader, arrow::internal::Executor* io_executor, int max_q = kDefaultBackgroundMaxQ, int q_restart = kDefaultBackgroundQRestart); +/// @} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/exec/ir_consumer.cc b/cpp/src/arrow/compute/exec/ir_consumer.cc deleted file mode 100644 index f17dbf1ed7962..0000000000000 --- a/cpp/src/arrow/compute/exec/ir_consumer.cc +++ /dev/null @@ -1,660 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/compute/exec/ir_consumer.h" - -#include "arrow/array/array_nested.h" -#include "arrow/array/builder_base.h" -#include "arrow/compute/cast.h" -#include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" -#include "arrow/compute/exec/options.h" -#include "arrow/compute/function_internal.h" -#include "arrow/ipc/dictionary.h" -#include "arrow/ipc/metadata_internal.h" -#include "arrow/util/unreachable.h" -#include "arrow/visit_type_inline.h" - -#include "generated/Plan_generated.h" - -namespace arrow { - -using internal::checked_cast; - -namespace compute { - -static inline Status UnexpectedNullField(const char* name) { - return Status::IOError("Unexpected null field ", name, " in flatbuffer-encoded IR"); -} - -Result> Convert(const flatbuf::Field& f) { - std::string name = ipc::internal::StringFromFlatbuffers(f.name()); - - FieldVector fields; - if (auto children = f.children()) { - fields.resize(children->size()); - int i = 0; - for (const flatbuf::Field* child : *children) { - if (child) return UnexpectedNullField("Field.children[i]"); - ARROW_ASSIGN_OR_RAISE(fields[i++], Convert(*child)); - } - } - - if (!f.type()) return UnexpectedNullField("Field.type"); - - std::shared_ptr type; - RETURN_NOT_OK(ipc::internal::ConcreteTypeFromFlatbuffer(f.type_type(), f.type(), - std::move(fields), &type)); - - std::shared_ptr metadata; - RETURN_NOT_OK(ipc::internal::GetKeyValueMetadata(f.custom_metadata(), &metadata)); - - return field(std::move(name), std::move(type), f.nullable(), std::move(metadata)); -} - -std::string LabelFromRelId(const ir::RelId* id) { - return id ? std::to_string(id->id()) : ""; -} - -Result> BufferFromFlatbufferByteVector( - const flatbuffers::Vector* vec) { - if (!vec) return nullptr; - - ARROW_ASSIGN_OR_RAISE(auto buf, AllocateBuffer(vec->size())); - - if (!vec->data()) return UnexpectedNullField("Vector.data"); - std::memcpy(buf->mutable_data(), vec->data(), vec->size()); - - return std::move(buf); -} - -Result Convert(const ir::Literal& lit); - -struct ConvertLiteralImpl { - Result Convert(const BooleanType& t) { return ValueOf(t); } - - Result Convert(const Int8Type& t) { return ValueOf(t); } - Result Convert(const Int16Type& t) { return ValueOf(t); } - Result Convert(const Int32Type& t) { return ValueOf(t); } - Result Convert(const Int64Type& t) { return ValueOf(t); } - - Result Convert(const UInt8Type& t) { return ValueOf(t); } - Result Convert(const UInt16Type& t) { return ValueOf(t); } - Result Convert(const UInt32Type& t) { return ValueOf(t); } - Result Convert(const UInt64Type& t) { return ValueOf(t); } - - Result Convert(const HalfFloatType& t) { return ValueOf(t); } - Result Convert(const FloatType& t) { return ValueOf(t); } - Result Convert(const DoubleType& t) { return ValueOf(t); } - - Result Convert(const Date32Type& t) { return ValueOf(t); } - Result Convert(const Date64Type& t) { return ValueOf(t); } - Result Convert(const Time32Type& t) { return ValueOf(t); } - Result Convert(const Time64Type& t) { return ValueOf(t); } - Result Convert(const DurationType& t) { return ValueOf(t); } - Result Convert(const TimestampType& t) { - return ValueOf(t); - } - - Result Convert(const IntervalType& t) { - ARROW_ASSIGN_OR_RAISE(auto lit, GetLiteral()); - - if (!lit->value()) return UnexpectedNullField("IntervalLiteral.value"); - switch (t.interval_type()) { - case IntervalType::MONTHS: - if (auto value = lit->value_as()) { - return Datum(std::make_shared(value->months())); - } - break; - - case IntervalType::DAY_TIME: - if (auto value = lit->value_as()) { - DayTimeIntervalType::DayMilliseconds day_ms{value->days(), - value->milliseconds()}; - return Datum(std::make_shared(day_ms)); - } - break; - - case IntervalType::MONTH_DAY_NANO: - return Status::NotImplemented( - "IntervalLiteral with interval_type=MONTH_DAY_NANO"); - } - - return Status::IOError("IntervalLiteral.type was ", t.ToString(), - " but IntervalLiteral.value had value_type ", - ir::EnumNameIntervalLiteralImpl(lit->value_type())); - } - - Result Convert(const DecimalType& t) { - ARROW_ASSIGN_OR_RAISE(auto lit, GetLiteral()); - - if (!lit->value()) return UnexpectedNullField("DecimalLiteral.value"); - if (static_cast(lit->value()->size()) != t.byte_width()) { - return Status::IOError("DecimalLiteral.type was ", t.ToString(), - " (expected byte width ", t.byte_width(), ")", - " but DecimalLiteral.value had size ", lit->value()->size()); - } - - switch (t.id()) { - case Type::DECIMAL128: { - std::array little_endian; - std::memcpy(little_endian.data(), lit->value(), lit->value()->size()); - Decimal128 value{BasicDecimal128::LittleEndianArray, little_endian}; - return Datum(std::make_shared(value, type_)); - } - - case Type::DECIMAL256: { - std::array little_endian; - std::memcpy(little_endian.data(), lit->value(), lit->value()->size()); - Decimal256 value{BasicDecimal256::LittleEndianArray, little_endian}; - return Datum(std::make_shared(value, type_)); - } - - default: - break; - } - - Unreachable(); - } - - Result Convert(const ListType&) { - ARROW_ASSIGN_OR_RAISE(auto lit, GetLiteral()); - - if (!lit->values()) return UnexpectedNullField("ListLiteral.values"); - ScalarVector values{lit->values()->size()}; - - int i = 0; - for (const ir::Literal* v : *lit->values()) { - if (!v) return UnexpectedNullField("ListLiteral.values[i]"); - ARROW_ASSIGN_OR_RAISE(Datum value, arrow::compute::Convert(*v)); - values[i++] = value.scalar(); - } - - std::unique_ptr builder; - RETURN_NOT_OK(MakeBuilder(default_memory_pool(), type_, &builder)); - RETURN_NOT_OK(builder->AppendScalars(std::move(values))); - ARROW_ASSIGN_OR_RAISE(auto arr, builder->Finish()); - return Datum(std::make_shared(std::move(arr), type_)); - } - - Result Convert(const MapType& t) { - ARROW_ASSIGN_OR_RAISE(auto lit, GetLiteral()); - - if (!lit->values()) return UnexpectedNullField("MapLiteral.values"); - ScalarVector keys{lit->values()->size()}, values{lit->values()->size()}; - - int i = 0; - for (const ir::KeyValue* kv : *lit->values()) { - if (!kv) return UnexpectedNullField("MapLiteral.values[i]"); - ARROW_ASSIGN_OR_RAISE(Datum key, arrow::compute::Convert(*kv->value())); - ARROW_ASSIGN_OR_RAISE(Datum value, arrow::compute::Convert(*kv->value())); - keys[i] = key.scalar(); - values[i] = value.scalar(); - ++i; - } - - ArrayVector kv_arrays(2); - std::unique_ptr builder; - RETURN_NOT_OK(MakeBuilder(default_memory_pool(), t.key_type(), &builder)); - RETURN_NOT_OK(builder->AppendScalars(std::move(keys))); - ARROW_ASSIGN_OR_RAISE(kv_arrays[0], builder->Finish()); - - RETURN_NOT_OK(MakeBuilder(default_memory_pool(), t.value_type(), &builder)); - RETURN_NOT_OK(builder->AppendScalars(std::move(values))); - ARROW_ASSIGN_OR_RAISE(kv_arrays[1], builder->Finish()); - - ARROW_ASSIGN_OR_RAISE(auto item_arr, - StructArray::Make(kv_arrays, t.value_type()->fields())); - return Datum(std::make_shared(std::move(item_arr), type_)); - } - - Result Convert(const StructType& t) { - ARROW_ASSIGN_OR_RAISE(auto lit, GetLiteral()); - if (!lit->values()) return UnexpectedNullField("StructLiteral.values"); - if (static_cast(lit->values()->size()) != t.num_fields()) { - return Status::IOError( - "StructLiteral.type was ", t.ToString(), "(expected ", t.num_fields(), - " fields)", " but StructLiteral.values has size ", lit->values()->size()); - } - - ScalarVector values{lit->values()->size()}; - int i = 0; - for (const ir::Literal* v : *lit->values()) { - if (!v) return UnexpectedNullField("StructLiteral.values[i]"); - ARROW_ASSIGN_OR_RAISE(Datum value, arrow::compute::Convert(*v)); - if (!value.type()->Equals(*t.field(i)->type())) { - return Status::IOError("StructLiteral.type was ", t.ToString(), " but value ", i, - " had type ", value.type()->ToString(), "(expected ", - t.field(i)->type()->ToString(), ")"); - } - values[i++] = value.scalar(); - } - - return Datum(std::make_shared(std::move(values), type_)); - } - - Result Convert(const StringType&) { - ARROW_ASSIGN_OR_RAISE(auto lit, GetLiteral()); - if (!lit->value()) return UnexpectedNullField("StringLiteral.value"); - - return Datum(ipc::internal::StringFromFlatbuffers(lit->value())); - } - - Result Convert(const BinaryType&) { - ARROW_ASSIGN_OR_RAISE(auto lit, GetLiteral()); - if (!lit->value()) return UnexpectedNullField("BinaryLiteral.value"); - - ARROW_ASSIGN_OR_RAISE(auto buf, BufferFromFlatbufferByteVector(lit->value())); - return Datum(std::make_shared(std::move(buf))); - } - - Result Convert(const FixedSizeBinaryType& t) { - ARROW_ASSIGN_OR_RAISE(auto lit, GetLiteral()); - if (!lit->value()) return UnexpectedNullField("FixedSizeBinaryLiteral.value"); - - if (static_cast(lit->value()->size()) != t.byte_width()) { - return Status::IOError("FixedSizeBinaryLiteral.type was ", t.ToString(), - " but FixedSizeBinaryLiteral.value had size ", - lit->value()->size()); - } - - ARROW_ASSIGN_OR_RAISE(auto buf, BufferFromFlatbufferByteVector(lit->value())); - return Datum(std::make_shared(std::move(buf), type_)); - } - - Status Visit(const NullType&) { Unreachable(); } - - Status NotImplemented() { - return Status::NotImplemented("Literals of type ", type_->ToString()); - } - Status Visit(const ExtensionType& t) { return NotImplemented(); } - Status Visit(const SparseUnionType& t) { return NotImplemented(); } - Status Visit(const DenseUnionType& t) { return NotImplemented(); } - Status Visit(const FixedSizeListType& t) { return NotImplemented(); } - Status Visit(const DictionaryType& t) { return NotImplemented(); } - Status Visit(const LargeStringType& t) { return NotImplemented(); } - Status Visit(const LargeBinaryType& t) { return NotImplemented(); } - Status Visit(const LargeListType& t) { return NotImplemented(); } - - template - Status Visit(const T& t) { - ARROW_ASSIGN_OR_RAISE(out_, Convert(t)); - return Status::OK(); - } - - template - Result GetLiteral() { - if (const Lit* l = lit_.impl_as()) return l; - - return Status::IOError( - "Literal.type was ", type_->ToString(), " but got ", - ir::EnumNameLiteralImpl(ir::LiteralImplTraits::enum_value), " Literal.impl"); - } - - template ::ScalarType, - typename ValueType = typename ScalarType::ValueType> - Result ValueOf(const T&) { - ARROW_ASSIGN_OR_RAISE(auto lit, GetLiteral()); - auto scalar = - std::make_shared(static_cast(lit->value()), type_); - return Datum(std::move(scalar)); - } - - Datum out_; - const std::shared_ptr& type_; - const ir::Literal& lit_; -}; - -Result Convert(const ir::Literal& lit) { - if (!lit.type()) return UnexpectedNullField("Literal.type"); - if (lit.type()->name()) { - return Status::IOError("Literal.type should have null Field.name"); - } - - ARROW_ASSIGN_OR_RAISE(auto field, Convert(*lit.type())); - if (!lit.impl()) return MakeNullScalar(field->type()); - - if (field->type()->id() == Type::NA) { - return Status::IOError("Literal of type null had non-null Literal.impl"); - } - - ConvertLiteralImpl visitor{{}, field->type(), lit}; - RETURN_NOT_OK(VisitTypeInline(*field->type(), &visitor)); - return std::move(visitor.out_); -} - -Result Convert(const ir::FieldRef& ref) { - switch (ref.ref_type()) { - case ir::Deref::StructField: - return FieldRef(ref.ref_as()->position()); - - case ir::Deref::FieldIndex: - return FieldRef(ref.ref_as()->position()); - - case ir::Deref::MapKey: - case ir::Deref::ArraySubscript: - case ir::Deref::ArraySlice: - default: - break; - } - return Status::NotImplemented("Deref::", EnumNameDeref(ref.ref_type())); -} - -Result Convert(const ir::Expression& expr); - -Result, std::vector>> Convert( - const flatbuffers::Vector>& cases) { - std::vector conditions(cases.size()), arguments(cases.size()); - - int i = 0; - for (const ir::CaseFragment* c : cases) { - if (!c) return UnexpectedNullField("Vector[i]"); - ARROW_ASSIGN_OR_RAISE(conditions[i], Convert(*c->match())); - ARROW_ASSIGN_OR_RAISE(arguments[i], Convert(*c->result())); - ++i; - } - - return std::make_pair(std::move(conditions), std::move(arguments)); -} - -Expression CaseWhen(std::vector conditions, std::vector arguments, - Expression default_value) { - arguments.insert(arguments.begin(), call("make_struct", std::move(conditions))); - arguments.push_back(std::move(default_value)); - return call("case_when", std::move(arguments)); -} - -Result Convert(const ir::Expression& expr) { - switch (expr.impl_type()) { - case ir::ExpressionImpl::Literal: { - ARROW_ASSIGN_OR_RAISE(Datum value, Convert(*expr.impl_as())); - return literal(std::move(value)); - } - - case ir::ExpressionImpl::FieldRef: { - ARROW_ASSIGN_OR_RAISE(FieldRef ref, Convert(*expr.impl_as())); - return field_ref(std::move(ref)); - } - - case ir::ExpressionImpl::Call: { - auto call = expr.impl_as(); - - if (!call->name()) return UnexpectedNullField("Call.name"); - auto name = ipc::internal::StringFromFlatbuffers(call->name()); - - if (!call->arguments()) return UnexpectedNullField("Call.arguments"); - std::vector arguments(call->arguments()->size()); - - int i = 0; - for (const ir::Expression* a : *call->arguments()) { - if (!a) return UnexpectedNullField("Call.arguments[i]"); - ARROW_ASSIGN_OR_RAISE(arguments[i++], Convert(*a)); - } - - // What about options...? - return arrow::compute::call(std::move(name), std::move(arguments)); - } - - case ir::ExpressionImpl::Cast: { - auto cast = expr.impl_as(); - - if (!cast->operand()) return UnexpectedNullField("Cast.operand"); - ARROW_ASSIGN_OR_RAISE(Expression arg, Convert(*cast->operand())); - - if (!cast->to()) return UnexpectedNullField("Cast.to"); - ARROW_ASSIGN_OR_RAISE(auto to, Convert(*cast->to())); - - return call("cast", {std::move(arg)}, CastOptions::Safe(to->type())); - } - - case ir::ExpressionImpl::ConditionalCase: { - auto conditional_case = expr.impl_as(); - - if (!conditional_case->conditions()) { - return UnexpectedNullField("ConditionalCase.conditions"); - } - ARROW_ASSIGN_OR_RAISE(auto cases, Convert(*conditional_case->conditions())); - - if (!conditional_case->else_()) return UnexpectedNullField("ConditionalCase.else"); - ARROW_ASSIGN_OR_RAISE(auto default_value, Convert(*conditional_case->else_())); - - return CaseWhen(std::move(cases.first), std::move(cases.second), - std::move(default_value)); - } - - case ir::ExpressionImpl::SimpleCase: { - auto simple_case = expr.impl_as(); - auto expression = simple_case->expression(); - auto matches = simple_case->matches(); - auto else_ = simple_case->else_(); - - if (!expression) return UnexpectedNullField("SimpleCase.expression"); - ARROW_ASSIGN_OR_RAISE(auto rhs, Convert(*expression)); - - if (!matches) return UnexpectedNullField("SimpleCase.matches"); - ARROW_ASSIGN_OR_RAISE(auto cases, Convert(*simple_case->matches())); - - // replace each condition with an equality expression with the rhs - for (auto& condition : cases.first) { - condition = equal(std::move(condition), rhs); - } - - if (!else_) return UnexpectedNullField("SimpleCase.else"); - ARROW_ASSIGN_OR_RAISE(auto default_value, Convert(*simple_case->else_())); - - return CaseWhen(std::move(cases.first), std::move(cases.second), - std::move(default_value)); - } - - case ir::ExpressionImpl::WindowCall: - default: - break; - } - - return Status::NotImplemented("ExpressionImpl::", - EnumNameExpressionImpl(expr.impl_type())); -} - -Result Convert(const ir::Relation& rel) { - switch (rel.impl_type()) { - case ir::RelationImpl::Source: { - auto source = rel.impl_as(); - - if (!source->name()) return UnexpectedNullField("Source.name"); - auto name = ipc::internal::StringFromFlatbuffers(source->name()); - - std::shared_ptr schema; - if (source->schema()) { - ipc::DictionaryMemo ignore; - RETURN_NOT_OK(ipc::internal::GetSchema(source->schema(), &ignore, &schema)); - } - - return Declaration{"catalog_source", - {}, - CatalogSourceNodeOptions{std::move(name), std::move(schema)}, - LabelFromRelId(source->id())}; - } - - case ir::RelationImpl::Filter: { - auto filter = rel.impl_as(); - - if (!filter->predicate()) return UnexpectedNullField("Filter.predicate"); - ARROW_ASSIGN_OR_RAISE(auto predicate, Convert(*filter->predicate())); - - if (!filter->rel()) return UnexpectedNullField("Filter.rel"); - ARROW_ASSIGN_OR_RAISE(auto arg, Convert(*filter->rel()).As()); - - return Declaration{"filter", - {std::move(arg)}, - FilterNodeOptions{std::move(predicate)}, - LabelFromRelId(filter->id())}; - } - - case ir::RelationImpl::Project: { - auto project = rel.impl_as(); - - if (!project->rel()) return UnexpectedNullField("Project.rel"); - ARROW_ASSIGN_OR_RAISE(auto arg, Convert(*project->rel()).As()); - - ProjectNodeOptions opts{{}}; - - if (!project->expressions()) return UnexpectedNullField("Project.expressions"); - for (const ir::Expression* expression : *project->expressions()) { - if (!expression) return UnexpectedNullField("Project.expressions[i]"); - ARROW_ASSIGN_OR_RAISE(auto expr, Convert(*expression)); - opts.expressions.push_back(std::move(expr)); - } - - return Declaration{ - "project", {std::move(arg)}, std::move(opts), LabelFromRelId(project->id())}; - } - - case ir::RelationImpl::Aggregate: { - auto aggregate = rel.impl_as(); - - if (!aggregate->rel()) return UnexpectedNullField("Aggregate.rel"); - ARROW_ASSIGN_OR_RAISE(auto arg, - Convert(*aggregate->rel()).As()); - - AggregateNodeOptions opts{{}, {}}; - - if (!aggregate->measures()) return UnexpectedNullField("Aggregate.measures"); - for (const ir::Expression* m : *aggregate->measures()) { - if (!m) return UnexpectedNullField("Aggregate.measures[i]"); - ARROW_ASSIGN_OR_RAISE(auto measure, Convert(*m)); - - auto call = measure.call(); - if (!call || call->arguments.size() != 1) { - return Status::IOError("One of Aggregate.measures was ", measure.ToString(), - " (expected Expression::Call with one argument)"); - } - - auto target = call->arguments.front().field_ref(); - if (!target) { - return Status::NotImplemented( - "Support for non-FieldRef arguments to Aggregate.measures"); - } - - opts.aggregates.push_back({call->function_name, nullptr, *target, - call->function_name + " " + target->ToString()}); - } - - if (!aggregate->groupings()) return UnexpectedNullField("Aggregate.groupings"); - if (aggregate->groupings()->size() > 1) { - return Status::NotImplemented("Support for multiple grouping sets"); - } - - if (aggregate->groupings()->size() == 1) { - if (!aggregate->groupings()->Get(0)) { - return UnexpectedNullField("Aggregate.groupings[0]"); - } - - if (!aggregate->groupings()->Get(0)->keys()) { - return UnexpectedNullField("Grouping.keys"); - } - - for (const ir::Expression* key : *aggregate->groupings()->Get(0)->keys()) { - if (!key) return UnexpectedNullField("Grouping.keys[i]"); - ARROW_ASSIGN_OR_RAISE(auto key_expr, Convert(*key)); - - auto key_ref = key_expr.field_ref(); - if (!key_ref) { - return Status::NotImplemented("Support for non-FieldRef grouping keys"); - } - opts.keys.push_back(*key_ref); - } - } - - return Declaration{"aggregate", - {std::move(arg)}, - std::move(opts), - LabelFromRelId(aggregate->id())}; - } - - case ir::RelationImpl::OrderBy: { - auto order_by = rel.impl_as(); - - if (!order_by->rel()) return UnexpectedNullField("OrderBy.rel"); - ARROW_ASSIGN_OR_RAISE(auto arg, Convert(*order_by->rel()).As()); - - if (!order_by->keys()) return UnexpectedNullField("OrderBy.keys"); - if (order_by->keys()->size() == 0) { - return Status::NotImplemented("Empty sort key list"); - } - - util::optional null_placement; - std::vector sort_keys; - - for (const ir::SortKey* key : *order_by->keys()) { - if (!key) return UnexpectedNullField("OrderBy.keys[i]"); - ARROW_ASSIGN_OR_RAISE(auto expr, Convert(*key->expression())); - - auto target = expr.field_ref(); - if (!target) { - return Status::NotImplemented( - "Support for non-FieldRef expressions in SortKey"); - } - if (target->IsNested()) { - return Status::NotImplemented( - "Support for nested FieldRef expressions in SortKey"); - } - switch (key->ordering()) { - case ir::Ordering::ASCENDING_THEN_NULLS: - case ir::Ordering::NULLS_THEN_ASCENDING: - sort_keys.emplace_back(*target, SortOrder::Ascending); - break; - case ir::Ordering::DESCENDING_THEN_NULLS: - case ir::Ordering::NULLS_THEN_DESCENDING: - sort_keys.emplace_back(*target, SortOrder::Descending); - break; - } - - NullPlacement key_null_placement{}; - switch (key->ordering()) { - case ir::Ordering::ASCENDING_THEN_NULLS: - case ir::Ordering::DESCENDING_THEN_NULLS: - key_null_placement = NullPlacement::AtEnd; - break; - case ir::Ordering::NULLS_THEN_ASCENDING: - case ir::Ordering::NULLS_THEN_DESCENDING: - key_null_placement = NullPlacement::AtStart; - break; - } - - if (null_placement && *null_placement != key_null_placement) { - return Status::NotImplemented("Per-key null_placement"); - } - null_placement = key_null_placement; - } - - return Declaration{"order_by_sink", - {std::move(arg)}, - OrderBySinkNodeOptions{ - SortOptions{std::move(sort_keys), *null_placement}, nullptr}, - LabelFromRelId(order_by->id())}; - } - - default: - break; - } - - return Status::NotImplemented("RelationImpl::", EnumNameRelationImpl(rel.impl_type())); -} - -} // namespace compute -} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/ir_consumer.h b/cpp/src/arrow/compute/exec/ir_consumer.h deleted file mode 100644 index 5af27f98f58fe..0000000000000 --- a/cpp/src/arrow/compute/exec/ir_consumer.h +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/expression.h" -#include "arrow/compute/exec/options.h" -#include "arrow/datum.h" -#include "arrow/result.h" -#include "arrow/util/visibility.h" - -#include "generated/Plan_generated.h" - -namespace arrow { - -namespace flatbuf = org::apache::arrow::flatbuf; - -namespace compute { - -namespace ir = org::apache::arrow::computeir::flatbuf; - -class ARROW_EXPORT CatalogSourceNodeOptions : public ExecNodeOptions { - public: - CatalogSourceNodeOptions(std::string name, std::shared_ptr schema, - Expression filter = literal(true), - std::vector projection = {}) - : name(std::move(name)), - schema(std::move(schema)), - filter(std::move(filter)), - projection(std::move(projection)) {} - - std::string name; - std::shared_ptr schema; - Expression filter; - std::vector projection; -}; - -ARROW_EXPORT -Result Convert(const ir::Literal& lit); - -ARROW_EXPORT -Result Convert(const ir::Expression& lit); - -ARROW_EXPORT -Result Convert(const ir::Relation& rel); - -template -auto ConvertRoot(const Buffer& buf) -> decltype(Convert(std::declval())) { - return Convert(*flatbuffers::GetRoot(buf.data())); -} - -} // namespace compute -} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/ir_test.cc b/cpp/src/arrow/compute/exec/ir_test.cc deleted file mode 100644 index d7eb37c185e13..0000000000000 --- a/cpp/src/arrow/compute/exec/ir_test.cc +++ /dev/null @@ -1,829 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/compute/exec/ir_consumer.h" - -#include - -#include - -#include "arrow/compute/exec/options.h" -#include "arrow/compute/exec/test_util.h" -#include "arrow/io/file.h" -#include "arrow/testing/matchers.h" -#include "arrow/util/io_util.h" -#include "arrow/util/string_view.h" - -#include "generated/Plan_generated.h" - -using testing::ElementsAre; -using testing::ElementsAreArray; -using testing::Eq; -using testing::HasSubstr; -using testing::Optional; -using testing::UnorderedElementsAreArray; - -namespace ir = org::apache::arrow::computeir::flatbuf; -namespace flatbuf = org::apache::arrow::flatbuf; - -DEFINE_string(computeir_dir, "", - "Directory containing Flatbuffer schemas for Arrow compute IR.\n" - "This is currently $ARROW_REPO/experimental/computeir/"); - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - gflags::ParseCommandLineFlags(&argc, &argv, true); - - if (std::system("flatc --version") != 0) { - std::cout << "flatc not available, skipping tests" << std::endl; - return 0; - } - - int ret = RUN_ALL_TESTS(); - gflags::ShutDownCommandLineFlags(); - return ret; -} - -namespace arrow { -namespace compute { - -std::shared_ptr FlatbufferFromJSON(std::string root_type, - util::string_view json) { - static std::unique_ptr dir; - - if (!dir) { - if (FLAGS_computeir_dir == "") { - std::cout << "Required argument -computeir_dir was not provided!" << std::endl; - std::abort(); - } - - dir = *arrow::internal::TemporaryDir::Make("ir_json_"); - } - - auto json_path = dir->path().ToString() + "ir.json"; - std::ofstream{json_path} << json; - - std::string cmd = "flatc --binary " + FLAGS_computeir_dir + "/Plan.fbs" + - " --root-type org.apache.arrow.computeir.flatbuf." + root_type + " " + - json_path; - - if (int err = std::system(cmd.c_str())) { - std::cerr << cmd << " failed with error code: " << err; - std::abort(); - } - - auto bin = *io::MemoryMappedFile::Open("ir.bin", io::FileMode::READ); - return *bin->Read(*bin->GetSize()); -} - -template -auto ConvertJSON(util::string_view json) -> decltype(Convert(std::declval())) { - std::string root_type; - if (std::is_same::value) { - root_type = "Literal"; - } else if (std::is_same::value) { - root_type = "Expression"; - } else if (std::is_same::value) { - root_type = "Relation"; - } else if (std::is_same::value) { - root_type = "Plan"; - } else { - std::cout << "Unknown Ir class in."; - std::abort(); - } - - auto buf = FlatbufferFromJSON(root_type, json); - return ConvertRoot(*buf); -} - -TEST(Literal, Int64) { - ASSERT_THAT(ConvertJSON(R"({ - type: { - type_type: "Int", - type: { bitWidth: 64, is_signed: true } - } - })"), - ResultWith(DataEq(std::make_shared()))); - - ASSERT_THAT(ConvertJSON(R"({ - type: { - type_type: "Int", - type: { bitWidth: 64, is_signed: true } - }, - impl_type: "Int64Literal", - impl: { value: 42 } - })"), - ResultWith(DataEq(42))); -} - -TEST(Expression, Comparison) { - ASSERT_THAT(ConvertJSON(R"({ - impl_type: "Call", - impl: { - name: "equal", - arguments: [ - { - impl_type: "FieldRef", - impl: { - ref_type: "FieldIndex", - ref: { - position: 2 - } - } - }, - { - impl_type: "Literal", - impl: { - type: { - type_type: "Int", - type: { bitWidth: 64, is_signed: true } - }, - impl_type: "Int64Literal", - impl: { value: 42 } - } - } - ] - } - })"), - ResultWith(Eq(equal(field_ref(2), literal(42))))); -} - -TEST(Relation, Filter) { - ASSERT_THAT( - ConvertJSON(R"({ - impl_type: "Filter", - impl: { - id: { id: 1 }, - rel: { - impl_type: "Source", - impl: { - id: { id: 0 }, - name: "test source", - schema: { - endianness: "Little", - fields: [ - { - name: "i32", - type_type: "Int", - type: { - bitWidth: 32, - is_signed: true - }, - nullable: true - }, - { - name: "f64", - type_type: "FloatingPoint", - type: { - precision: "DOUBLE" - }, - nullable: true - }, - { - name: "i64", - type_type: "Int", - type: { - bitWidth: 64, - is_signed: true - }, - nullable: true - } - ] - } - } - }, - predicate: { - impl_type: "Call", - impl: { - name: "equal", - arguments: [ - { - impl_type: "FieldRef", - impl: { - ref_type: "FieldIndex", - ref: { - position: 2 - } - } - }, - { - impl_type: "Literal", - impl: { - type: { - type_type: "Int", - type: { bitWidth: 64, is_signed: true } - }, - impl_type: "Int64Literal", - impl: { value: 42 } - } - } - ] - } - } - } - })"), - ResultWith(Eq(Declaration::Sequence({ - {"catalog_source", - CatalogSourceNodeOptions{"test source", schema({ - field("i32", int32()), - field("f64", float64()), - field("i64", int64()), - })}, - "0"}, - {"filter", FilterNodeOptions{equal(field_ref(2), literal(42))}, "1"}, - })))); -} - -TEST(Relation, AggregateSimple) { - ASSERT_THAT( - ConvertJSON(R"({ - "impl": { - id: {id: 1}, - "groupings": [ - { - "keys": [ - { - "impl": { - "ref": { - "position": 0 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - } - ] - } - ], - "measures": [ - { - "impl": { - "arguments": [ - { - "impl": { - "ref": { - "position": 1 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - } - ], - "name": "sum" - }, - "impl_type": "Call" - }, - { - "impl": { - "arguments": [ - { - "impl": { - "ref": { - "position": 2 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - } - ], - "name": "mean" - }, - "impl_type": "Call" - } - ], - "rel": { - "impl": { - id: {id: 0}, - "name": "tbl", - "schema": { - "endianness": "Little", - "fields": [ - { - "name": "foo", - "nullable": true, - "type": { - "bitWidth": 32, - "is_signed": true - }, - "type_type": "Int" - }, - { - "name": "bar", - "nullable": true, - "type": { - "bitWidth": 64, - "is_signed": true - }, - "type_type": "Int" - }, - { - "name": "baz", - "nullable": true, - "type": { - "precision": "DOUBLE" - }, - "type_type": "FloatingPoint" - } - ] - } - }, - "impl_type": "Source" - } - }, - "impl_type": "Aggregate" -})"), - ResultWith(Eq(Declaration::Sequence({ - {"catalog_source", - CatalogSourceNodeOptions{"tbl", schema({ - field("foo", int32()), - field("bar", int64()), - field("baz", float64()), - })}, - "0"}, - {"aggregate", - AggregateNodeOptions{/*aggregates=*/{ - {"sum", nullptr, 1, "sum FieldRef.FieldPath(1)"}, - {"mean", nullptr, 2, "mean FieldRef.FieldPath(2)"}, - }, - /*keys=*/{0}}, - "1"}, - })))); -} - -TEST(Relation, AggregateWithHaving) { - ASSERT_THAT( - ConvertJSON(R"({ - "impl": { - id: {id: 3}, - "predicate": { - "impl": { - "arguments": [ - { - "impl": { - "ref": { - "position": 0 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - }, - { - "impl": { - "impl": { - "value": 10 - }, - "impl_type": "Int8Literal", - "type": { - "nullable": true, - "type": { - "bitWidth": 8, - "is_signed": true - }, - "type_type": "Int" - } - }, - "impl_type": "Literal" - } - ], - "name": "greater" - }, - "impl_type": "Call" - }, - "rel": { - "impl": { - id: {id: 2}, - "groupings": [ - { - "keys": [ - { - "impl": { - "ref": { - "position": 0 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - } - ] - } - ], - "measures": [ - { - "impl": { - "arguments": [ - { - "impl": { - "ref": { - "position": 1 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - } - ], - "name": "sum" - }, - "impl_type": "Call" - }, - { - "impl": { - "arguments": [ - { - "impl": { - "ref": { - "position": 2 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - } - ], - "name": "mean" - }, - "impl_type": "Call" - } - ], - "rel": { - "impl": { - id: {id: 1}, - "predicate": { - "impl": { - "arguments": [ - { - "impl": { - "ref": { - "position": 0 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - }, - { - "impl": { - "impl": { - "value": 3 - }, - "impl_type": "Int8Literal", - "type": { - "nullable": true, - "type": { - "bitWidth": 8, - "is_signed": true - }, - "type_type": "Int" - } - }, - "impl_type": "Literal" - } - ], - "name": "less" - }, - "impl_type": "Call" - }, - "rel": { - "impl": { - id: {id: 0}, - "name": "tbl", - "schema": { - "endianness": "Little", - "fields": [ - { - "name": "foo", - "nullable": true, - "type": { - "bitWidth": 32, - "is_signed": true - }, - "type_type": "Int" - }, - { - "name": "bar", - "nullable": true, - "type": { - "bitWidth": 64, - "is_signed": true - }, - "type_type": "Int" - }, - { - "name": "baz", - "nullable": true, - "type": { - "precision": "DOUBLE" - }, - "type_type": "FloatingPoint" - } - ] - } - }, - "impl_type": "Source" - } - }, - "impl_type": "Filter" - } - }, - "impl_type": "Aggregate" - } - }, - "impl_type": "Filter" -})"), - ResultWith(Eq(Declaration::Sequence({ - {"catalog_source", - CatalogSourceNodeOptions{"tbl", schema({ - field("foo", int32()), - field("bar", int64()), - field("baz", float64()), - })}, - "0"}, - {"filter", FilterNodeOptions{less(field_ref(0), literal(3))}, "1"}, - {"aggregate", - AggregateNodeOptions{/*aggregates=*/{ - {"sum", nullptr, 1, "sum FieldRef.FieldPath(1)"}, - {"mean", nullptr, 2, "mean FieldRef.FieldPath(2)"}, - }, - /*keys=*/{0}}, - "2"}, - {"filter", FilterNodeOptions{greater(field_ref(0), literal(10))}, "3"}, - })))); -} - -TEST(Relation, ProjectionWithFilter) { - ASSERT_THAT( - ConvertJSON(R"({ - "impl": { - id: {id:2}, - "predicate": { - "impl": { - "arguments": [ - { - "impl": { - "ref": { - "position": 0 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - }, - { - "impl": { - "impl": { - "value": 3 - }, - "impl_type": "Int8Literal", - "type": { - "nullable": true, - "type": { - "bitWidth": 8, - "is_signed": true - }, - "type_type": "Int" - } - }, - "impl_type": "Literal" - } - ], - "name": "less" - }, - "impl_type": "Call" - }, - "rel": { - "impl": { - id: {id:1}, - "expressions": [ - { - "impl": { - "ref": { - "position": 1 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - }, - { - "impl": { - "ref": { - "position": 2 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - } - ], - "rel": { - "impl": { - id: {id:0}, - "name": "tbl", - "schema": { - "endianness": "Little", - "fields": [ - { - "name": "foo", - "nullable": true, - "type": { - "bitWidth": 32, - "is_signed": true - }, - "type_type": "Int" - }, - { - "name": "bar", - "nullable": true, - "type": { - "bitWidth": 64, - "is_signed": true - }, - "type_type": "Int" - }, - { - "name": "baz", - "nullable": true, - "type": { - "precision": "DOUBLE" - }, - "type_type": "FloatingPoint" - } - ] - } - }, - "impl_type": "Source" - } - }, - "impl_type": "Project" - } - }, - "impl_type": "Filter" -})"), - ResultWith(Eq(Declaration::Sequence({ - {"catalog_source", - CatalogSourceNodeOptions{"tbl", schema({ - field("foo", int32()), - field("bar", int64()), - field("baz", float64()), - })}, - "0"}, - {"project", ProjectNodeOptions{/*expressions=*/{field_ref(1), field_ref(2)}}, - "1"}, - {"filter", FilterNodeOptions{less(field_ref(0), literal(3))}, "2"}, - })))); -} - -TEST(Relation, ProjectionWithSort) { - ASSERT_THAT( - ConvertJSON(R"({ - "impl": { - id: {id:2}, - "keys": [ - { - "expression": { - "impl": { - "ref": { - "position": 0 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - }, - "ordering": "NULLS_THEN_ASCENDING" - }, - { - "expression": { - "impl": { - "ref": { - "position": 1 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - }, - "ordering": "NULLS_THEN_DESCENDING" - } - ], - "rel": { - "impl": { - id: {id:1}, - "expressions": [ - { - "impl": { - "ref": { - "position": 0 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - }, - { - "impl": { - "ref": { - "position": 1 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - }, - { - "impl": { - "ref": { - "position": 2 - }, - "ref_type": "FieldIndex", - "relation_index": 0 - }, - "impl_type": "FieldRef" - } - ], - "rel": { - "impl": { - id: {id: 0}, - "name": "tbl", - "schema": { - "endianness": "Little", - "fields": [ - { - "name": "foo", - "nullable": true, - "type": { - "bitWidth": 32, - "is_signed": true - }, - "type_type": "Int" - }, - { - "name": "bar", - "nullable": true, - "type": { - "bitWidth": 64, - "is_signed": true - }, - "type_type": "Int" - }, - { - "name": "baz", - "nullable": true, - "type": { - "precision": "DOUBLE" - }, - "type_type": "FloatingPoint" - } - ] - } - }, - "impl_type": "Source" - } - }, - "impl_type": "Project" - } - }, - "impl_type": "OrderBy" -})"), - ResultWith(Eq(Declaration::Sequence({ - {"catalog_source", - CatalogSourceNodeOptions{"tbl", schema({ - field("foo", int32()), - field("bar", int64()), - field("baz", float64()), - })}, - "0"}, - {"project", - ProjectNodeOptions{/*expressions=*/{field_ref(0), field_ref(1), field_ref(2)}}, - "1"}, - {"order_by_sink", - OrderBySinkNodeOptions{SortOptions{{ - SortKey{0, SortOrder::Ascending}, - SortKey{1, SortOrder::Descending}, - }, - NullPlacement::AtStart}, - nullptr}, - "2"}, - })))); -} - -} // namespace compute -} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/options.cc b/cpp/src/arrow/compute/exec/options.cc index c09ab1c1b68c2..ef1a0c7e2eb2a 100644 --- a/cpp/src/arrow/compute/exec/options.cc +++ b/cpp/src/arrow/compute/exec/options.cc @@ -25,6 +25,8 @@ namespace arrow { namespace compute { +constexpr int64_t TableSourceNodeOptions::kDefaultMaxBatchSize; + std::string ToString(JoinType t) { switch (t) { case JoinType::LEFT_SEMI: diff --git a/cpp/src/arrow/compute/exec/options.h b/cpp/src/arrow/compute/exec/options.h index 4a0cd602efb54..a8e8c1ee23096 100644 --- a/cpp/src/arrow/compute/exec/options.h +++ b/cpp/src/arrow/compute/exec/options.h @@ -64,7 +64,9 @@ class ARROW_EXPORT SourceNodeOptions : public ExecNodeOptions { /// \brief An extended Source node which accepts a table class ARROW_EXPORT TableSourceNodeOptions : public ExecNodeOptions { public: - TableSourceNodeOptions(std::shared_ptr table, int64_t max_batch_size) + static constexpr int64_t kDefaultMaxBatchSize = 1 << 20; + TableSourceNodeOptions(std::shared_ptr
table, + int64_t max_batch_size = kDefaultMaxBatchSize) : table(table), max_batch_size(max_batch_size) {} // arrow table which acts as the data source diff --git a/cpp/src/arrow/compute/exec/sink_node.cc b/cpp/src/arrow/compute/exec/sink_node.cc index a1426265cf94f..8af4e8e996cce 100644 --- a/cpp/src/arrow/compute/exec/sink_node.cc +++ b/cpp/src/arrow/compute/exec/sink_node.cc @@ -388,47 +388,6 @@ class ConsumingSinkNode : public ExecNode, public BackpressureControl { std::vector names_; int32_t backpressure_counter_ = 0; }; - -/** - * @brief This node is an extension on ConsumingSinkNode - * to facilitate to get the output from an execution plan - * as a table. We define a custom SinkNodeConsumer to - * enable this functionality. - */ - -struct TableSinkNodeConsumer : public SinkNodeConsumer { - public: - TableSinkNodeConsumer(std::shared_ptr
* out, MemoryPool* pool) - : out_(out), pool_(pool) {} - - Status Init(const std::shared_ptr& schema, - BackpressureControl* backpressure_control) override { - // If the user is collecting into a table then backpressure is meaningless - ARROW_UNUSED(backpressure_control); - schema_ = schema; - return Status::OK(); - } - - Status Consume(ExecBatch batch) override { - std::lock_guard guard(consume_mutex_); - ARROW_ASSIGN_OR_RAISE(auto rb, batch.ToRecordBatch(schema_, pool_)); - batches_.push_back(rb); - return Status::OK(); - } - - Future<> Finish() override { - ARROW_ASSIGN_OR_RAISE(*out_, Table::FromRecordBatches(batches_)); - return Status::OK(); - } - - private: - std::shared_ptr
* out_; - MemoryPool* pool_; - std::shared_ptr schema_; - std::vector> batches_; - std::mutex consume_mutex_; -}; - static Result MakeTableConsumingSinkNode( compute::ExecPlan* plan, std::vector inputs, const compute::ExecNodeOptions& options) { diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc index 7ea515c1c5135..cc26143179a36 100644 --- a/cpp/src/arrow/compute/exec/test_util.cc +++ b/cpp/src/arrow/compute/exec/test_util.cc @@ -33,7 +33,6 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/exec.h" #include "arrow/compute/exec/exec_plan.h" -#include "arrow/compute/exec/ir_consumer.h" #include "arrow/compute/exec/options.h" #include "arrow/compute/exec/util.h" #include "arrow/compute/function_internal.h" @@ -308,18 +307,6 @@ bool operator==(const Declaration& l, const Declaration& r) { if (l.inputs != r.inputs) return false; if (l.label != r.label) return false; - if (l.factory_name == "catalog_source") { - auto l_opts = &OptionsAs(l); - auto r_opts = &OptionsAs(r); - - bool schemas_equal = l_opts->schema == nullptr - ? r_opts->schema == nullptr - : l_opts->schema->Equals(r_opts->schema); - - return l_opts->name == r_opts->name && schemas_equal && - l_opts->filter == r_opts->filter && l_opts->projection == r_opts->projection; - } - if (l.factory_name == "filter") { return OptionsAs(l).filter_expression == OptionsAs(r).filter_expression; @@ -367,22 +354,6 @@ bool operator==(const Declaration& l, const Declaration& r) { static inline void PrintToImpl(const std::string& factory_name, const ExecNodeOptions& opts, std::ostream* os) { - if (factory_name == "catalog_source") { - auto o = &OptionsAs(opts); - *os << o->name << ", schema=" << o->schema->ToString(); - if (o->filter != literal(true)) { - *os << ", filter=" << o->filter.ToString(); - } - if (!o->projection.empty()) { - *os << ", projection=["; - for (const auto& ref : o->projection) { - *os << ref.ToString() << ","; - } - *os << "]"; - } - return; - } - if (factory_name == "filter") { return PrintTo(OptionsAs(opts).filter_expression, os); } diff --git a/cpp/src/arrow/compute/exec/util.cc b/cpp/src/arrow/compute/exec/util.cc index ae70cfcd46f50..a34a9c6271322 100644 --- a/cpp/src/arrow/compute/exec/util.cc +++ b/cpp/src/arrow/compute/exec/util.cc @@ -383,5 +383,25 @@ size_t ThreadIndexer::Check(size_t thread_index) { return thread_index; } +Status TableSinkNodeConsumer::Init(const std::shared_ptr& schema, + BackpressureControl* backpressure_control) { + // If the user is collecting into a table then backpressure is meaningless + ARROW_UNUSED(backpressure_control); + schema_ = schema; + return Status::OK(); +} + +Status TableSinkNodeConsumer::Consume(ExecBatch batch) { + auto guard = consume_mutex_.Lock(); + ARROW_ASSIGN_OR_RAISE(auto rb, batch.ToRecordBatch(schema_, pool_)); + batches_.push_back(std::move(rb)); + return Status::OK(); +} + +Future<> TableSinkNodeConsumer::Finish() { + ARROW_ASSIGN_OR_RAISE(*out_, Table::FromRecordBatches(batches_)); + return Status::OK(); +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/exec/util.h b/cpp/src/arrow/compute/exec/util.h index 30526cb835ab1..7e716808fa008 100644 --- a/cpp/src/arrow/compute/exec/util.h +++ b/cpp/src/arrow/compute/exec/util.h @@ -24,6 +24,7 @@ #include #include "arrow/buffer.h" +#include "arrow/compute/exec/options.h" #include "arrow/compute/type_fwd.h" #include "arrow/memory_pool.h" #include "arrow/result.h" @@ -342,5 +343,23 @@ class TailSkipForSIMD { } }; +/// \brief A consumer that collects results into an in-memory table +struct ARROW_EXPORT TableSinkNodeConsumer : public SinkNodeConsumer { + public: + TableSinkNodeConsumer(std::shared_ptr
* out, MemoryPool* pool) + : out_(out), pool_(pool) {} + Status Init(const std::shared_ptr& schema, + BackpressureControl* backpressure_control) override; + Status Consume(ExecBatch batch) override; + Future<> Finish() override; + + private: + std::shared_ptr
* out_; + MemoryPool* pool_; + std::shared_ptr schema_; + std::vector> batches_; + util::Mutex consume_mutex_; +}; + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc index c7909487fb3e6..8f400b2d249a2 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_test.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc @@ -2532,6 +2532,7 @@ template void CheckModes(const Datum& array, const ModeOptions options, const std::vector& expected_modes, const std::vector& expected_counts) { + ARROW_SCOPED_TRACE("Mode Options: ", options.ToString()); ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, options)); ValidateOutput(out); const StructArray out_array(out.array()); @@ -2543,7 +2544,9 @@ void CheckModes(const Datum& array, const ModeOptions options, for (int i = 0; i < out_array.length(); ++i) { // equal or nan equal ASSERT_TRUE((expected_modes[i] == out_modes[i]) || - (expected_modes[i] != expected_modes[i] && out_modes[i] != out_modes[i])); + (expected_modes[i] != expected_modes[i] && out_modes[i] != out_modes[i])) + << " Actual Value: " << out_modes[i] << "\n" + << "Expected Value: " << expected_modes[i]; ASSERT_EQ(expected_counts[i], out_counts[i]); } } @@ -2552,6 +2555,7 @@ template <> void CheckModes(const Datum& array, const ModeOptions options, const std::vector& expected_modes, const std::vector& expected_counts) { + ARROW_SCOPED_TRACE("Mode Options: ", options.ToString()); ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, options)); ValidateOutput(out); const StructArray out_array(out.array()); @@ -2561,7 +2565,7 @@ void CheckModes(const Datum& array, const ModeOptions options, const uint8_t* out_modes = out_array.field(0)->data()->GetValues(1); const int64_t* out_counts = out_array.field(1)->data()->GetValues(1); for (int i = 0; i < out_array.length(); ++i) { - ASSERT_TRUE(expected_modes[i] == bit_util::GetBit(out_modes, i)); + ASSERT_EQ(expected_modes[i], bit_util::GetBit(out_modes, i)); ASSERT_EQ(expected_counts[i], out_counts[i]); } } @@ -3337,6 +3341,7 @@ class TestPrimitiveQuantileKernel : public ::testing::Test { for (size_t i = 0; i < this->interpolations_.size(); ++i) { options.interpolation = this->interpolations_[i]; + ARROW_SCOPED_TRACE("Quantile Options: ", options.ToString()); ASSERT_OK_AND_ASSIGN(Datum out, Quantile(array, options)); const auto& out_array = out.make_array(); @@ -3351,7 +3356,9 @@ class TestPrimitiveQuantileKernel : public ::testing::Test { const auto& numeric_scalar = checked_pointer_cast(expected[j][i].scalar()); ASSERT_TRUE((quantiles[j] == numeric_scalar->value) || - (std::isnan(quantiles[j]) && std::isnan(numeric_scalar->value))); + (std::isnan(quantiles[j]) && std::isnan(numeric_scalar->value))) + << " Actual Value: " << quantiles[j] << "\n" + << "Expected Value: " << numeric_scalar->value; } } else { AssertTypeEqual(out_array->type(), type_singleton()); diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc index 943ca4c2dc2ff..fb23106b6bc97 100644 --- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc +++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc @@ -451,11 +451,11 @@ const FunctionDoc and_kleene_doc{ const FunctionDoc and_not_kleene_doc{ "Logical 'and not' boolean values (Kleene logic)", ("This function behaves as follows with nulls:\n\n" - "- true and null = null\n" - "- null and false = null\n" - "- false and null = false\n" - "- null and true = false\n" - "- null and null = null\n" + "- true and not null = null\n" + "- null and not false = null\n" + "- false and not null = false\n" + "- null and not true = false\n" + "- null and not null = null\n" "\n" "In other words, in this context a null value really means \"unknown\",\n" "and an unknown value 'and not' true is always false, as is false\n" @@ -467,14 +467,14 @@ const FunctionDoc or_kleene_doc{ "Logical 'or' boolean values (Kleene logic)", ("This function behaves as follows with nulls:\n\n" "- true or null = true\n" - "- null and true = true\n" - "- false and null = null\n" - "- null and false = null\n" - "- null and null = null\n" + "- null or true = true\n" + "- false or null = null\n" + "- null or false = null\n" + "- null or null = null\n" "\n" "In other words, in this context a null value really means \"unknown\",\n" "and an unknown value 'or' true is always true.\n" - "For a different null behavior, see function \"and\"."), + "For a different null behavior, see function \"or\"."), {"x", "y"}}; } // namespace diff --git a/cpp/src/arrow/csv/converter_test.cc b/cpp/src/arrow/csv/converter_test.cc index 0c5def554b0c0..c32b07d2de40c 100644 --- a/cpp/src/arrow/csv/converter_test.cc +++ b/cpp/src/arrow/csv/converter_test.cc @@ -624,21 +624,29 @@ TEST(TimestampConversion, UserDefinedParsers) { {{86400000}, {172800000}}, options); } -#ifndef _WIN32 TEST(TimestampConversion, UserDefinedParsersWithZone) { auto options = ConvertOptions::Defaults(); auto type = timestamp(TimeUnit::SECOND, "America/Phoenix"); // Test a single parser options.timestamp_parsers = {TimestampParser::MakeStrptime("%m/%d/%Y %z")}; - AssertConversion(type, {"01/02/1970 +0000,01/03/1970 +0000\n"}, - {{86400}, {172800}}, options); + if (internal::kStrptimeSupportsZone) { + AssertConversion( + type, {"01/02/1970 +0000,01/03/1970 +0000\n"}, {{86400}, {172800}}, options); + } else { + AssertConversionError(type, {"01/02/1970 +0000,01/03/1970 +0000\n"}, {0, 1}, options); + } // Test multiple parsers options.timestamp_parsers.push_back(TimestampParser::MakeISO8601()); - AssertConversion( - type, {"01/02/1970 +0000,1970-01-03T00:00:00+0000\n"}, {{86400}, {172800}}, - options); + if (internal::kStrptimeSupportsZone) { + AssertConversion( + type, {"01/02/1970 +0000,1970-01-03T00:00:00+0000\n"}, {{86400}, {172800}}, + options); + } else { + AssertConversionError(type, {"01/02/1970 +0000,1970-01-03T00:00:00+0000\n"}, {0}, + options); + } // Test errors options.timestamp_parsers = {TimestampParser::MakeStrptime("%m/%d/%Y")}; @@ -646,25 +654,6 @@ TEST(TimestampConversion, UserDefinedParsersWithZone) { options.timestamp_parsers.push_back(TimestampParser::MakeISO8601()); AssertConversionError(type, {"01/02/1970,1970-01-03T00:00:00+0000\n"}, {0}, options); } -#else -// Windows uses the vendored musl strptime which doesn't support %z. -TEST(TimestampConversion, UserDefinedParsersWithZone) { - auto options = ConvertOptions::Defaults(); - auto type = timestamp(TimeUnit::SECOND, "America/Phoenix"); - - options.timestamp_parsers = {TimestampParser::MakeStrptime("%m/%d/%Y %z")}; - AssertConversionError(type, {"01/02/1970 +0000,01/03/1970 +0000\n"}, {0, 1}, options); - - options.timestamp_parsers.push_back(TimestampParser::MakeISO8601()); - AssertConversionError(type, {"01/02/1970 +0000,1970-01-03T00:00:00+0000\n"}, {0}, - options); - - options.timestamp_parsers = {TimestampParser::MakeStrptime("%m/%d/%Y")}; - AssertConversionError(type, {"01/02/1970,01/03/1970\n"}, {0, 1}, options); - options.timestamp_parsers.push_back(TimestampParser::MakeISO8601()); - AssertConversionError(type, {"01/02/1970,1970-01-03T00:00:00+0000\n"}, {0}, options); -} -#endif Decimal128 Dec128(util::string_view value) { Decimal128 dec; diff --git a/cpp/src/arrow/engine/CMakeLists.txt b/cpp/src/arrow/engine/CMakeLists.txt index 8edd22900e6cb..4109b7b3bcdf3 100644 --- a/cpp/src/arrow/engine/CMakeLists.txt +++ b/cpp/src/arrow/engine/CMakeLists.txt @@ -23,9 +23,10 @@ set(ARROW_SUBSTRAIT_SRCS substrait/expression_internal.cc substrait/extension_set.cc substrait/extension_types.cc - substrait/serde.cc substrait/plan_internal.cc substrait/relation_internal.cc + substrait/serde.cc + substrait/test_plan_builder.cc substrait/type_internal.cc substrait/util.cc) @@ -67,6 +68,7 @@ endif() add_arrow_test(substrait_test SOURCES substrait/ext_test.cc + substrait/function_test.cc substrait/serde_test.cc EXTRA_LINK_LIBS ${ARROW_SUBSTRAIT_TEST_LINK_LIBS} diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 07c222bc4cfd1..9b364741a3571 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -41,6 +41,90 @@ namespace internal { using ::arrow::internal::make_unique; } // namespace internal +Status DecodeArg(const substrait::FunctionArgument& arg, uint32_t idx, + SubstraitCall* call, const ExtensionSet& ext_set, + const ConversionOptions& conversion_options) { + if (arg.has_enum_()) { + const substrait::FunctionArgument::Enum& enum_val = arg.enum_(); + switch (enum_val.enum_kind_case()) { + case substrait::FunctionArgument::Enum::EnumKindCase::kSpecified: + call->SetEnumArg(idx, enum_val.specified()); + break; + case substrait::FunctionArgument::Enum::EnumKindCase::kUnspecified: + call->SetEnumArg(idx, util::nullopt); + break; + default: + return Status::Invalid("Unrecognized enum kind case: ", + enum_val.enum_kind_case()); + } + } else if (arg.has_value()) { + ARROW_ASSIGN_OR_RAISE(compute::Expression expr, + FromProto(arg.value(), ext_set, conversion_options)); + call->SetValueArg(idx, std::move(expr)); + } else if (arg.has_type()) { + return Status::NotImplemented("Type arguments not currently supported"); + } else { + return Status::NotImplemented("Unrecognized function argument class"); + } + return Status::OK(); +} + +Result DecodeScalarFunction( + Id id, const substrait::Expression::ScalarFunction& scalar_fn, + const ExtensionSet& ext_set, const ConversionOptions& conversion_options) { + ARROW_ASSIGN_OR_RAISE(auto output_type_and_nullable, + FromProto(scalar_fn.output_type(), ext_set, conversion_options)); + SubstraitCall call(id, output_type_and_nullable.first, output_type_and_nullable.second); + for (int i = 0; i < scalar_fn.arguments_size(); i++) { + ARROW_RETURN_NOT_OK(DecodeArg(scalar_fn.arguments(i), static_cast(i), &call, + ext_set, conversion_options)); + } + return std::move(call); +} + +std::string EnumToString(int value, const google::protobuf::EnumDescriptor* descriptor) { + const google::protobuf::EnumValueDescriptor* value_desc = + descriptor->FindValueByNumber(value); + if (value_desc == nullptr) { + return "unknown"; + } + return value_desc->name(); +} + +Result FromProto(const substrait::AggregateFunction& func, bool is_hash, + const ExtensionSet& ext_set, + const ConversionOptions& conversion_options) { + if (func.phase() != substrait::AggregationPhase::AGGREGATION_PHASE_INITIAL_TO_RESULT) { + return Status::NotImplemented( + "Unsupported aggregation phase '", + EnumToString(func.phase(), substrait::AggregationPhase_descriptor()), + "'. Only INITIAL_TO_RESULT is supported"); + } + if (func.invocation() != + substrait::AggregateFunction::AggregationInvocation:: + AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_ALL) { + return Status::NotImplemented( + "Unsupported aggregation invocation '", + EnumToString(func.invocation(), + substrait::AggregateFunction::AggregationInvocation_descriptor()), + "'. Only AGGREGATION_INVOCATION_ALL is " + "supported"); + } + if (func.sorts_size() > 0) { + return Status::NotImplemented("Aggregation sorts are not supported"); + } + ARROW_ASSIGN_OR_RAISE(auto output_type_and_nullable, + FromProto(func.output_type(), ext_set, conversion_options)); + ARROW_ASSIGN_OR_RAISE(Id id, ext_set.DecodeFunction(func.function_reference())); + SubstraitCall call(id, output_type_and_nullable.first, output_type_and_nullable.second, + is_hash); + for (int i = 0; i < func.arguments_size(); i++) { + ARROW_RETURN_NOT_OK(DecodeArg(func.arguments(i), static_cast(i), &call, + ext_set, conversion_options)); + } + return std::move(call); +} + Result FromProto(const substrait::Expression& expr, const ExtensionSet& ext_set, const ConversionOptions& conversion_options) { @@ -166,34 +250,14 @@ Result FromProto(const substrait::Expression& expr, case substrait::Expression::kScalarFunction: { const auto& scalar_fn = expr.scalar_function(); - ARROW_ASSIGN_OR_RAISE(auto decoded_function, + ARROW_ASSIGN_OR_RAISE(Id function_id, ext_set.DecodeFunction(scalar_fn.function_reference())); - - std::vector arguments(scalar_fn.arguments_size()); - for (int i = 0; i < scalar_fn.arguments_size(); ++i) { - const auto& argument = scalar_fn.arguments(i); - switch (argument.arg_type_case()) { - case substrait::FunctionArgument::kValue: { - ARROW_ASSIGN_OR_RAISE( - arguments[i], FromProto(argument.value(), ext_set, conversion_options)); - break; - } - default: - return Status::NotImplemented( - "only value arguments are currently supported for functions"); - } - } - - auto func_name = decoded_function.name.to_string(); - if (func_name != "cast") { - return compute::call(func_name, std::move(arguments)); - } else { - ARROW_ASSIGN_OR_RAISE( - auto output_type_desc, - FromProto(scalar_fn.output_type(), ext_set, conversion_options)); - auto cast_options = compute::CastOptions::Safe(std::move(output_type_desc.first)); - return compute::call(func_name, std::move(arguments), std::move(cast_options)); - } + ARROW_ASSIGN_OR_RAISE(ExtensionIdRegistry::SubstraitCallToArrow function_converter, + ext_set.registry()->GetSubstraitCallToArrow(function_id)); + ARROW_ASSIGN_OR_RAISE( + SubstraitCall substrait_call, + DecodeScalarFunction(function_id, scalar_fn, ext_set, conversion_options)); + return function_converter(substrait_call); } default: @@ -827,6 +891,42 @@ static Result> MakeListElementReference( return MakeDirectReference(std::move(expr), std::move(ref_segment)); } +Result> EncodeSubstraitCall( + const SubstraitCall& call, ExtensionSet* ext_set, + const ConversionOptions& conversion_options) { + ARROW_ASSIGN_OR_RAISE(uint32_t anchor, ext_set->EncodeFunction(call.id())); + auto scalar_fn = internal::make_unique(); + scalar_fn->set_function_reference(anchor); + ARROW_ASSIGN_OR_RAISE( + std::unique_ptr output_type, + ToProto(*call.output_type(), call.output_nullable(), ext_set, conversion_options)); + scalar_fn->set_allocated_output_type(output_type.release()); + + for (uint32_t i = 0; i < call.size(); i++) { + substrait::FunctionArgument* arg = scalar_fn->add_arguments(); + if (call.HasEnumArg(i)) { + auto enum_val = internal::make_unique(); + ARROW_ASSIGN_OR_RAISE(util::optional enum_arg, + call.GetEnumArg(i)); + if (enum_arg) { + enum_val->set_specified(enum_arg->to_string()); + } else { + enum_val->set_allocated_unspecified(new google::protobuf::Empty()); + } + arg->set_allocated_enum_(enum_val.release()); + } else if (call.HasValueArg(i)) { + ARROW_ASSIGN_OR_RAISE(compute::Expression value_arg, call.GetValueArg(i)); + ARROW_ASSIGN_OR_RAISE(std::unique_ptr value_expr, + ToProto(value_arg, ext_set, conversion_options)); + arg->set_allocated_value(value_expr.release()); + } else { + return Status::Invalid("Call reported having ", call.size(), + " arguments but no argument could be found at index ", i); + } + } + return std::move(scalar_fn); +} + Result> ToProto( const compute::Expression& expr, ExtensionSet* ext_set, const ConversionOptions& conversion_options) { @@ -933,17 +1033,12 @@ Result> ToProto( } // other expression types dive into extensions immediately - ARROW_ASSIGN_OR_RAISE(auto anchor, ext_set->EncodeFunction(call->function_name)); - - auto scalar_fn = internal::make_unique(); - scalar_fn->set_function_reference(anchor); - scalar_fn->mutable_arguments()->Reserve(static_cast(arguments.size())); - for (auto& arg : arguments) { - auto argument = internal::make_unique(); - argument->set_allocated_value(arg.release()); - scalar_fn->mutable_arguments()->AddAllocated(argument.release()); - } - + ARROW_ASSIGN_OR_RAISE( + ExtensionIdRegistry::ArrowToSubstraitCall converter, + ext_set->registry()->GetArrowToSubstraitCall(call->function_name)); + ARROW_ASSIGN_OR_RAISE(SubstraitCall substrait_call, converter(*call)); + ARROW_ASSIGN_OR_RAISE(std::unique_ptr scalar_fn, + EncodeSubstraitCall(substrait_call, ext_set, conversion_options)); out->set_allocated_scalar_function(scalar_fn.release()); return std::move(out); } diff --git a/cpp/src/arrow/engine/substrait/expression_internal.h b/cpp/src/arrow/engine/substrait/expression_internal.h index 2b4dec2a00b21..f132afc0c1ac9 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.h +++ b/cpp/src/arrow/engine/substrait/expression_internal.h @@ -50,5 +50,9 @@ Result> ToProto(const Datum&, ExtensionSet*, const ConversionOptions&); +ARROW_ENGINE_EXPORT +Result FromProto(const substrait::AggregateFunction&, bool is_hash, + const ExtensionSet&, const ConversionOptions&); + } // namespace engine } // namespace arrow diff --git a/cpp/src/arrow/engine/substrait/ext_test.cc b/cpp/src/arrow/engine/substrait/ext_test.cc index 8e41cb7c98cee..4b37aa8fcdba3 100644 --- a/cpp/src/arrow/engine/substrait/ext_test.cc +++ b/cpp/src/arrow/engine/substrait/ext_test.cc @@ -56,12 +56,10 @@ struct DefaultExtensionIdRegistryProvider : public ExtensionIdRegistryProvider { struct NestedExtensionIdRegistryProvider : public ExtensionIdRegistryProvider { virtual ~NestedExtensionIdRegistryProvider() {} - std::shared_ptr registry_ = substrait::MakeExtensionIdRegistry(); + std::shared_ptr registry_ = MakeExtensionIdRegistry(); ExtensionIdRegistry* get() const override { return &*registry_; } }; -using Id = ExtensionIdRegistry::Id; - bool operator==(const Id& id1, const Id& id2) { return id1.uri == id2.uri && id1.name == id2.name; } @@ -85,8 +83,8 @@ static const std::vector kTypeNames = { TypeName{month_day_nano_interval(), "interval_month_day_nano"}, }; -static const std::vector kFunctionNames = { - "add", +static const std::vector kFunctionIds = { + {kSubstraitArithmeticFunctionsUri, "add"}, }; static const std::vector kTempFunctionNames = { @@ -141,15 +139,12 @@ TEST_P(ExtensionIdRegistryTest, GetFunctions) { auto provider = std::get<0>(GetParam()); auto registry = provider->get(); - for (util::string_view name : kFunctionNames) { - auto id = Id{kArrowExtTypesUri, name}; - for (auto funcrec_opt : {registry->GetFunction(id), registry->GetFunction(name)}) { - ASSERT_TRUE(funcrec_opt); - auto funcrec = funcrec_opt.value(); - ASSERT_EQ(id, funcrec.id); - ASSERT_EQ(name, funcrec.function_name); - } + for (Id func_id : kFunctionIds) { + ASSERT_OK_AND_ASSIGN(ExtensionIdRegistry::SubstraitCallToArrow converter, + registry->GetSubstraitCallToArrow(func_id)); + ASSERT_TRUE(converter); } + ASSERT_RAISES(NotImplemented, registry->GetSubstraitCallToArrow(kNonExistentId)); ASSERT_FALSE(registry->GetType(kNonExistentId)); ASSERT_FALSE(registry->GetType(*kNonExistentTypeName.type)); } @@ -158,10 +153,10 @@ TEST_P(ExtensionIdRegistryTest, ReregisterFunctions) { auto provider = std::get<0>(GetParam()); auto registry = provider->get(); - for (util::string_view name : kFunctionNames) { - auto id = Id{kArrowExtTypesUri, name}; - ASSERT_RAISES(Invalid, registry->CanRegisterFunction(id, name.to_string())); - ASSERT_RAISES(Invalid, registry->RegisterFunction(id, name.to_string())); + for (Id function_id : kFunctionIds) { + ASSERT_RAISES(Invalid, registry->CanAddSubstraitCallToArrow(function_id)); + ASSERT_RAISES(Invalid, registry->AddSubstraitCallToArrow( + function_id, function_id.name.to_string())); } } @@ -173,11 +168,26 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(std::make_shared(), "nested"))); +TEST(ExtensionIdRegistryTest, GetSupportedSubstraitFunctions) { + ExtensionIdRegistry* default_registry = default_extension_id_registry(); + std::vector supported_functions = + default_registry->GetSupportedSubstraitFunctions(); + std::size_t num_functions = supported_functions.size(); + ASSERT_GT(num_functions, 0); + + std::shared_ptr nested = + nested_extension_id_registry(default_registry); + ASSERT_OK(nested->AddSubstraitCallToArrow(kNonExistentId, "some_function")); + + std::size_t num_nested_functions = nested->GetSupportedSubstraitFunctions().size(); + ASSERT_EQ(num_functions + 1, num_nested_functions); +} + TEST(ExtensionIdRegistryTest, RegisterTempTypes) { auto default_registry = default_extension_id_registry(); constexpr int rounds = 3; for (int i = 0; i < rounds; i++) { - auto registry = substrait::MakeExtensionIdRegistry(); + auto registry = MakeExtensionIdRegistry(); for (TypeName e : kTempTypeNames) { auto id = Id{kArrowExtTypesUri, e.name}; @@ -194,15 +204,15 @@ TEST(ExtensionIdRegistryTest, RegisterTempFunctions) { auto default_registry = default_extension_id_registry(); constexpr int rounds = 3; for (int i = 0; i < rounds; i++) { - auto registry = substrait::MakeExtensionIdRegistry(); + auto registry = MakeExtensionIdRegistry(); for (util::string_view name : kTempFunctionNames) { auto id = Id{kArrowExtTypesUri, name}; - ASSERT_OK(registry->CanRegisterFunction(id, name.to_string())); - ASSERT_OK(registry->RegisterFunction(id, name.to_string())); - ASSERT_RAISES(Invalid, registry->CanRegisterFunction(id, name.to_string())); - ASSERT_RAISES(Invalid, registry->RegisterFunction(id, name.to_string())); - ASSERT_OK(default_registry->CanRegisterFunction(id, name.to_string())); + ASSERT_OK(registry->CanAddSubstraitCallToArrow(id)); + ASSERT_OK(registry->AddSubstraitCallToArrow(id, name.to_string())); + ASSERT_RAISES(Invalid, registry->CanAddSubstraitCallToArrow(id)); + ASSERT_RAISES(Invalid, registry->AddSubstraitCallToArrow(id, name.to_string())); + ASSERT_OK(default_registry->CanAddSubstraitCallToArrow(id)); } } } @@ -246,24 +256,24 @@ TEST(ExtensionIdRegistryTest, RegisterNestedFunctions) { auto default_registry = default_extension_id_registry(); constexpr int rounds = 3; for (int i = 0; i < rounds; i++) { - auto registry1 = substrait::MakeExtensionIdRegistry(); + auto registry1 = MakeExtensionIdRegistry(); - ASSERT_OK(registry1->CanRegisterFunction(id1, name1.to_string())); - ASSERT_OK(registry1->RegisterFunction(id1, name1.to_string())); + ASSERT_OK(registry1->CanAddSubstraitCallToArrow(id1)); + ASSERT_OK(registry1->AddSubstraitCallToArrow(id1, name1.to_string())); for (int j = 0; j < rounds; j++) { - auto registry2 = substrait::MakeExtensionIdRegistry(); + auto registry2 = MakeExtensionIdRegistry(); - ASSERT_OK(registry2->CanRegisterFunction(id2, name2.to_string())); - ASSERT_OK(registry2->RegisterFunction(id2, name2.to_string())); - ASSERT_RAISES(Invalid, registry2->CanRegisterFunction(id2, name2.to_string())); - ASSERT_RAISES(Invalid, registry2->RegisterFunction(id2, name2.to_string())); - ASSERT_OK(default_registry->CanRegisterFunction(id2, name2.to_string())); + ASSERT_OK(registry2->CanAddSubstraitCallToArrow(id2)); + ASSERT_OK(registry2->AddSubstraitCallToArrow(id2, name2.to_string())); + ASSERT_RAISES(Invalid, registry2->CanAddSubstraitCallToArrow(id2)); + ASSERT_RAISES(Invalid, registry2->AddSubstraitCallToArrow(id2, name2.to_string())); + ASSERT_OK(default_registry->CanAddSubstraitCallToArrow(id2)); } - ASSERT_RAISES(Invalid, registry1->CanRegisterFunction(id1, name1.to_string())); - ASSERT_RAISES(Invalid, registry1->RegisterFunction(id1, name1.to_string())); - ASSERT_OK(default_registry->CanRegisterFunction(id1, name1.to_string())); + ASSERT_RAISES(Invalid, registry1->CanAddSubstraitCallToArrow(id1)); + ASSERT_RAISES(Invalid, registry1->AddSubstraitCallToArrow(id1, name1.to_string())); + ASSERT_OK(default_registry->CanAddSubstraitCallToArrow(id1)); } } diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index 08eb6acc9ca89..493d576e839bb 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -17,9 +17,9 @@ #include "arrow/engine/substrait/extension_set.h" -#include -#include +#include +#include "arrow/engine/substrait/expression_internal.h" #include "arrow/util/hash_util.h" #include "arrow/util/hashing.h" #include "arrow/util/string_view.h" @@ -28,6 +28,9 @@ namespace arrow { namespace engine { namespace { +// TODO(ARROW-16988): replace this with EXACT_ROUNDTRIP mode +constexpr bool kExactRoundTrip = true; + struct TypePtrHashEq { template size_t operator()(const Ptr& type) const { @@ -42,16 +45,115 @@ struct TypePtrHashEq { } // namespace -size_t ExtensionIdRegistry::IdHashEq::operator()(ExtensionIdRegistry::Id id) const { +std::string Id::ToString() const { + std::stringstream sstream; + sstream << uri; + sstream << '#'; + sstream << name; + return sstream.str(); +} + +size_t IdHashEq::operator()(Id id) const { constexpr ::arrow::internal::StringViewHash hash = {}; auto out = static_cast(hash(id.uri)); ::arrow::internal::hash_combine(out, hash(id.name)); return out; } -bool ExtensionIdRegistry::IdHashEq::operator()(ExtensionIdRegistry::Id l, - ExtensionIdRegistry::Id r) const { - return l.uri == r.uri && l.name == r.name; +bool IdHashEq::operator()(Id l, Id r) const { return l.uri == r.uri && l.name == r.name; } + +Id IdStorage::Emplace(Id id) { + util::string_view owned_uri = EmplaceUri(id.uri); + + util::string_view owned_name; + auto name_itr = names_.find(id.name); + if (name_itr == names_.end()) { + owned_names_.emplace_back(id.name); + owned_name = owned_names_.back(); + names_.insert(owned_name); + } else { + owned_name = *name_itr; + } + + return {owned_uri, owned_name}; +} + +util::optional IdStorage::Find(Id id) const { + util::optional maybe_owned_uri = FindUri(id.uri); + if (!maybe_owned_uri) { + return util::nullopt; + } + + auto name_itr = names_.find(id.name); + if (name_itr == names_.end()) { + return util::nullopt; + } else { + return Id{*maybe_owned_uri, *name_itr}; + } +} + +util::optional IdStorage::FindUri(util::string_view uri) const { + auto uri_itr = uris_.find(uri); + if (uri_itr == uris_.end()) { + return util::nullopt; + } + return *uri_itr; +} + +util::string_view IdStorage::EmplaceUri(util::string_view uri) { + auto uri_itr = uris_.find(uri); + if (uri_itr == uris_.end()) { + owned_uris_.emplace_back(uri); + util::string_view owned_uri = owned_uris_.back(); + uris_.insert(owned_uri); + return owned_uri; + } + return *uri_itr; +} + +Result> SubstraitCall::GetEnumArg( + uint32_t index) const { + if (index >= size_) { + return Status::Invalid("Expected Substrait call to have an enum argument at index ", + index, " but it did not have enough arguments"); + } + auto enum_arg_it = enum_args_.find(index); + if (enum_arg_it == enum_args_.end()) { + return Status::Invalid("Expected Substrait call to have an enum argument at index ", + index, " but the argument was not an enum."); + } + return enum_arg_it->second; +} + +bool SubstraitCall::HasEnumArg(uint32_t index) const { + return enum_args_.find(index) != enum_args_.end(); +} + +void SubstraitCall::SetEnumArg(uint32_t index, util::optional enum_arg) { + size_ = std::max(size_, index + 1); + enum_args_[index] = std::move(enum_arg); +} + +Result SubstraitCall::GetValueArg(uint32_t index) const { + if (index >= size_) { + return Status::Invalid("Expected Substrait call to have a value argument at index ", + index, " but it did not have enough arguments"); + } + auto value_arg_it = value_args_.find(index); + if (value_arg_it == value_args_.end()) { + return Status::Invalid("Expected Substrait call to have a value argument at index ", + index, " but the argument was not a value"); + } + return value_arg_it->second; +} + +bool SubstraitCall::HasValueArg(uint32_t index) const { + return value_args_.find(index) != value_args_.end(); +} + +void SubstraitCall::SetValueArg(uint32_t index, compute::Expression value_arg) { + size_ = std::max(size_, index + 1); + value_args_[index] = std::move(value_arg); } // A builder used when creating a Substrait plan from an Arrow execution plan. In @@ -97,54 +199,54 @@ Result ExtensionSet::Make( std::unordered_map uris, std::unordered_map type_ids, std::unordered_map function_ids, const ExtensionIdRegistry* registry) { - ExtensionSet set; + ExtensionSet set(default_extension_id_registry()); set.registry_ = registry; - // TODO(bkietz) move this into the registry as registry->OwnUris(&uris) or so - std::unordered_set - uris_owned_by_registry; - for (util::string_view uri : registry->Uris()) { - uris_owned_by_registry.insert(uri); - } - for (auto& uri : uris) { - auto it = uris_owned_by_registry.find(uri.second); - if (it == uris_owned_by_registry.end()) { - return Status::KeyError("Uri '", uri.second, "' not found in registry"); + util::optional maybe_uri_internal = registry->FindUri(uri.second); + if (maybe_uri_internal) { + set.uris_[uri.first] = *maybe_uri_internal; + } else { + if (kExactRoundTrip) { + return Status::Invalid( + "Plan contained a URI that the extension registry is unaware of: ", + uri.second); + } + set.uris_[uri.first] = set.plan_specific_ids_.EmplaceUri(uri.second); } - uri.second = *it; // Ensure uris point into the registry's memory - set.AddUri(uri); } set.types_.reserve(type_ids.size()); + for (const auto& type_id : type_ids) { + if (type_id.second.empty()) continue; + RETURN_NOT_OK(set.CheckHasUri(type_id.second.uri)); - for (unsigned int i = 0; i < static_cast(type_ids.size()); ++i) { - if (type_ids[i].empty()) continue; - RETURN_NOT_OK(set.CheckHasUri(type_ids[i].uri)); - - if (auto rec = registry->GetType(type_ids[i])) { - set.types_[i] = {rec->id, rec->type}; + if (auto rec = registry->GetType(type_id.second)) { + set.types_[type_id.first] = {rec->id, rec->type}; continue; } - return Status::Invalid("Type ", type_ids[i].uri, "#", type_ids[i].name, " not found"); + return Status::Invalid("Type ", type_id.second.uri, "#", type_id.second.name, + " not found"); } set.functions_.reserve(function_ids.size()); - - for (unsigned int i = 0; i < static_cast(function_ids.size()); ++i) { - if (function_ids[i].empty()) continue; - RETURN_NOT_OK(set.CheckHasUri(function_ids[i].uri)); - - if (auto rec = registry->GetFunction(function_ids[i])) { - set.functions_[i] = {rec->id, rec->function_name}; - continue; + for (const auto& function_id : function_ids) { + if (function_id.second.empty()) continue; + RETURN_NOT_OK(set.CheckHasUri(function_id.second.uri)); + util::optional maybe_id_internal = registry->FindId(function_id.second); + if (maybe_id_internal) { + set.functions_[function_id.first] = *maybe_id_internal; + } else { + if (kExactRoundTrip) { + return Status::Invalid( + "Plan contained a function id that the extension registry is unaware of: ", + function_id.second.uri, "#", function_id.second.name); + } + set.functions_[function_id.first] = + set.plan_specific_ids_.Emplace(function_id.second); } - return Status::Invalid("Function ", function_ids[i].uri, "#", function_ids[i].name, - " not found"); } - set.uris_ = std::move(uris); - return std::move(set); } @@ -162,39 +264,34 @@ Result ExtensionSet::EncodeType(const DataType& type) { auto it_success = types_map_.emplace(rec->id, static_cast(types_map_.size())); if (it_success.second) { - DCHECK_EQ(types_.find(static_cast(types_.size())), types_.end()) + DCHECK_EQ(types_.find(static_cast(types_.size())), types_.end()) << "Type existed in types_ but not types_map_. ExtensionSet is inconsistent"; - types_[static_cast(types_.size())] = {rec->id, rec->type}; + types_[static_cast(types_.size())] = {rec->id, rec->type}; } return it_success.first->second; } return Status::KeyError("type ", type.ToString(), " not found in the registry"); } -Result ExtensionSet::DecodeFunction(uint32_t anchor) const { - if (functions_.find(anchor) == functions_.end() || functions_.at(anchor).id.empty()) { +Result ExtensionSet::DecodeFunction(uint32_t anchor) const { + if (functions_.find(anchor) == functions_.end() || functions_.at(anchor).empty()) { return Status::Invalid("User defined function reference ", anchor, " did not have a corresponding anchor in the extension set"); } return functions_.at(anchor); } -Result ExtensionSet::EncodeFunction(util::string_view function_name) { - if (auto rec = registry_->GetFunction(function_name)) { - RETURN_NOT_OK(this->AddUri(rec->id)); - auto it_success = - functions_map_.emplace(rec->id, static_cast(functions_map_.size())); - if (it_success.second) { - DCHECK_EQ(functions_.find(static_cast(functions_.size())), - functions_.end()) - << "Function existed in functions_ but not functions_map_. ExtensionSet is " - "inconsistent"; - functions_[static_cast(functions_.size())] = {rec->id, - rec->function_name}; - } - return it_success.first->second; +Result ExtensionSet::EncodeFunction(Id function_id) { + RETURN_NOT_OK(this->AddUri(function_id)); + auto it_success = + functions_map_.emplace(function_id, static_cast(functions_map_.size())); + if (it_success.second) { + DCHECK_EQ(functions_.find(static_cast(functions_.size())), functions_.end()) + << "Function existed in functions_ but not functions_map_. ExtensionSet is " + "inconsistent"; + functions_[static_cast(functions_.size())] = function_id; } - return Status::KeyError("function ", function_name, " not found in the registry"); + return it_success.first->second; } template @@ -207,16 +304,38 @@ const int* GetIndex(const KeyToIndex& key_to_index, const Key& key) { namespace { struct ExtensionIdRegistryImpl : ExtensionIdRegistry { + ExtensionIdRegistryImpl() : parent_(nullptr) {} + explicit ExtensionIdRegistryImpl(const ExtensionIdRegistry* parent) : parent_(parent) {} + virtual ~ExtensionIdRegistryImpl() {} - std::vector Uris() const override { - return {uris_.begin(), uris_.end()}; + util::optional FindUri(util::string_view uri) const override { + if (parent_) { + util::optional parent_uri = parent_->FindUri(uri); + if (parent_uri) { + return parent_uri; + } + } + return ids_.FindUri(uri); + } + + util::optional FindId(Id id) const override { + if (parent_) { + util::optional parent_id = parent_->FindId(id); + if (parent_id) { + return parent_id; + } + } + return ids_.Find(id); } util::optional GetType(const DataType& type) const override { if (auto index = GetIndex(type_to_index_, &type)) { return TypeRecord{type_ids_[*index], types_[*index]}; } + if (parent_) { + return parent_->GetType(type); + } return {}; } @@ -224,6 +343,9 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry { if (auto index = GetIndex(id_to_index_, id)) { return TypeRecord{type_ids_[*index], types_[*index]}; } + if (parent_) { + return parent_->GetType(id); + } return {}; } @@ -234,14 +356,20 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry { if (type_to_index_.find(&*type) != type_to_index_.end()) { return Status::Invalid("Type was already registered"); } + if (parent_) { + return parent_->CanRegisterType(id, type); + } return Status::OK(); } Status RegisterType(Id id, std::shared_ptr type) override { DCHECK_EQ(type_ids_.size(), types_.size()); - Id copied_id{*uris_.emplace(id.uri.to_string()).first, - *names_.emplace(id.name.to_string()).first}; + if (parent_) { + ARROW_RETURN_NOT_OK(parent_->CanRegisterType(id, type)); + } + + Id copied_id = ids_.Emplace(id); auto index = static_cast(type_ids_.size()); @@ -261,155 +389,394 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry { return Status::OK(); } - util::optional GetFunction( - util::string_view arrow_function_name) const override { - if (auto index = GetIndex(function_name_to_index_, arrow_function_name)) { - return FunctionRecord{function_ids_[*index], *function_name_ptrs_[*index]}; + Status CanAddSubstraitCallToArrow(Id substrait_function_id) const override { + if (substrait_to_arrow_.find(substrait_function_id) != substrait_to_arrow_.end()) { + return Status::Invalid("Cannot register function converter for Substrait id ", + substrait_function_id.ToString(), + " because a converter already exists"); } - return {}; + if (parent_) { + return parent_->CanAddSubstraitCallToArrow(substrait_function_id); + } + return Status::OK(); } - util::optional GetFunction(Id id) const override { - if (auto index = GetIndex(function_id_to_index_, id)) { - return FunctionRecord{function_ids_[*index], *function_name_ptrs_[*index]}; + Status CanAddSubstraitAggregateToArrow(Id substrait_function_id) const override { + if (substrait_to_arrow_agg_.find(substrait_function_id) != + substrait_to_arrow_agg_.end()) { + return Status::Invalid( + "Cannot register aggregate function converter for Substrait id ", + substrait_function_id.ToString(), + " because an aggregate converter already exists"); } - return {}; + if (parent_) { + return parent_->CanAddSubstraitAggregateToArrow(substrait_function_id); + } + return Status::OK(); + } + + template + Status AddSubstraitToArrowFunc( + Id substrait_id, ConverterType conversion_func, + std::unordered_map* dest) { + // Convert id to view into registry-owned memory + Id copied_id = ids_.Emplace(substrait_id); + + auto add_result = dest->emplace(copied_id, std::move(conversion_func)); + if (!add_result.second) { + return Status::Invalid( + "Failed to register Substrait to Arrow function converter because a converter " + "already existed for Substrait id ", + substrait_id.ToString()); + } + + return Status::OK(); + } + + Status AddSubstraitCallToArrow(Id substrait_function_id, + SubstraitCallToArrow conversion_func) override { + if (parent_) { + ARROW_RETURN_NOT_OK(parent_->CanAddSubstraitCallToArrow(substrait_function_id)); + } + return AddSubstraitToArrowFunc( + substrait_function_id, std::move(conversion_func), &substrait_to_arrow_); } - Status CanRegisterFunction(Id id, - const std::string& arrow_function_name) const override { - if (function_id_to_index_.find(id) != function_id_to_index_.end()) { - return Status::Invalid("Function id was already registered"); + Status AddSubstraitAggregateToArrow( + Id substrait_function_id, SubstraitAggregateToArrow conversion_func) override { + if (parent_) { + ARROW_RETURN_NOT_OK( + parent_->CanAddSubstraitAggregateToArrow(substrait_function_id)); } - if (function_name_to_index_.find(arrow_function_name) != - function_name_to_index_.end()) { - return Status::Invalid("Function name was already registered"); + return AddSubstraitToArrowFunc( + substrait_function_id, std::move(conversion_func), &substrait_to_arrow_agg_); + } + + template + Status AddArrowToSubstraitFunc(std::string arrow_function_name, ConverterType converter, + std::unordered_map* dest) { + auto add_result = dest->emplace(std::move(arrow_function_name), std::move(converter)); + if (!add_result.second) { + return Status::Invalid( + "Failed to register Arrow to Substrait function converter for Arrow function ", + arrow_function_name, " because a converter already existed"); } return Status::OK(); } - Status RegisterFunction(Id id, std::string arrow_function_name) override { - DCHECK_EQ(function_ids_.size(), function_name_ptrs_.size()); + Status AddArrowToSubstraitCall(std::string arrow_function_name, + ArrowToSubstraitCall converter) override { + if (parent_) { + ARROW_RETURN_NOT_OK(parent_->CanAddArrowToSubstraitCall(arrow_function_name)); + } + return AddArrowToSubstraitFunc(std::move(arrow_function_name), converter, + &arrow_to_substrait_); + } - Id copied_id{*uris_.emplace(id.uri.to_string()).first, - *names_.emplace(id.name.to_string()).first}; + Status AddArrowToSubstraitAggregate(std::string arrow_function_name, + ArrowToSubstraitAggregate converter) override { + if (parent_) { + ARROW_RETURN_NOT_OK(parent_->CanAddArrowToSubstraitAggregate(arrow_function_name)); + } + return AddArrowToSubstraitFunc(std::move(arrow_function_name), converter, + &arrow_to_substrait_agg_); + } - const std::string& copied_function_name{ - *function_names_.emplace(std::move(arrow_function_name)).first}; + Status CanAddArrowToSubstraitCall(const std::string& function_name) const override { + if (arrow_to_substrait_.find(function_name) != arrow_to_substrait_.end()) { + return Status::Invalid( + "Cannot register function converter because a converter already exists"); + } + if (parent_) { + return parent_->CanAddArrowToSubstraitCall(function_name); + } + return Status::OK(); + } - auto index = static_cast(function_ids_.size()); + Status CanAddArrowToSubstraitAggregate( + const std::string& function_name) const override { + if (arrow_to_substrait_agg_.find(function_name) != arrow_to_substrait_agg_.end()) { + return Status::Invalid( + "Cannot register function converter because a converter already exists"); + } + if (parent_) { + return parent_->CanAddArrowToSubstraitAggregate(function_name); + } + return Status::OK(); + } - auto it_success = function_id_to_index_.emplace(copied_id, index); + Result GetSubstraitCallToArrow( + Id substrait_function_id) const override { + auto maybe_converter = substrait_to_arrow_.find(substrait_function_id); + if (maybe_converter == substrait_to_arrow_.end()) { + if (parent_) { + return parent_->GetSubstraitCallToArrow(substrait_function_id); + } + return Status::NotImplemented( + "No conversion function exists to convert the Substrait function ", + substrait_function_id.uri, "#", substrait_function_id.name, + " to an Arrow call expression"); + } + return maybe_converter->second; + } - if (!it_success.second) { - return Status::Invalid("Function id was already registered"); + Result GetSubstraitAggregateToArrow( + Id substrait_function_id) const override { + auto maybe_converter = substrait_to_arrow_agg_.find(substrait_function_id); + if (maybe_converter == substrait_to_arrow_agg_.end()) { + if (parent_) { + return parent_->GetSubstraitAggregateToArrow(substrait_function_id); + } + return Status::NotImplemented( + "No conversion function exists to convert the Substrait aggregate function ", + substrait_function_id.uri, "#", substrait_function_id.name, + " to an Arrow aggregate"); } + return maybe_converter->second; + } - if (!function_name_to_index_.emplace(copied_function_name, index).second) { - function_id_to_index_.erase(it_success.first); - return Status::Invalid("Function name was already registered"); + Result GetArrowToSubstraitCall( + const std::string& arrow_function_name) const override { + auto maybe_converter = arrow_to_substrait_.find(arrow_function_name); + if (maybe_converter == arrow_to_substrait_.end()) { + if (parent_) { + return parent_->GetArrowToSubstraitCall(arrow_function_name); + } + return Status::NotImplemented( + "No conversion function exists to convert the Arrow function ", + arrow_function_name, " to a Substrait call"); } + return maybe_converter->second; + } - function_name_ptrs_.push_back(&copied_function_name); - function_ids_.push_back(copied_id); - return Status::OK(); + Result GetArrowToSubstraitAggregate( + const std::string& arrow_function_name) const override { + auto maybe_converter = arrow_to_substrait_agg_.find(arrow_function_name); + if (maybe_converter == arrow_to_substrait_agg_.end()) { + if (parent_) { + return parent_->GetArrowToSubstraitAggregate(arrow_function_name); + } + return Status::NotImplemented( + "No conversion function exists to convert the Arrow aggregate ", + arrow_function_name, " to a Substrait aggregate"); + } + return maybe_converter->second; } - Status RegisterFunction(std::string uri, std::string name, - std::string arrow_function_name) override { - return RegisterFunction({uri, name}, arrow_function_name); + std::vector GetSupportedSubstraitFunctions() const override { + std::vector encoded_ids; + for (const auto& entry : substrait_to_arrow_) { + encoded_ids.push_back(entry.first.ToString()); + } + for (const auto& entry : substrait_to_arrow_agg_) { + encoded_ids.push_back(entry.first.ToString()); + } + if (parent_) { + std::vector parent_ids = parent_->GetSupportedSubstraitFunctions(); + encoded_ids.insert(encoded_ids.end(), make_move_iterator(parent_ids.begin()), + make_move_iterator(parent_ids.end())); + } + std::sort(encoded_ids.begin(), encoded_ids.end()); + return encoded_ids; } - // owning storage of uris, names, (arrow::)function_names, types - // note that storing strings like this is safe since references into an - // unordered_set are not invalidated on insertion - std::unordered_set uris_, names_, function_names_; + // Defined below since it depends on some helper functions defined below + Status AddSubstraitCallToArrow(Id substrait_function_id, + std::string arrow_function_name) override; + + // Parent registry, null for the root, non-null for nested + const ExtensionIdRegistry* parent_; + + // owning storage of ids & types + IdStorage ids_; DataTypeVector types_; + // There should only be one entry per Arrow function so there is no need + // to separate ownership and lookup + std::unordered_map arrow_to_substrait_; + std::unordered_map arrow_to_substrait_agg_; // non-owning lookup helpers - std::vector type_ids_, function_ids_; + std::vector type_ids_; std::unordered_map id_to_index_; std::unordered_map type_to_index_; - - std::vector function_name_ptrs_; - std::unordered_map function_id_to_index_; - std::unordered_map - function_name_to_index_; + std::unordered_map substrait_to_arrow_; + std::unordered_map + substrait_to_arrow_agg_; }; -struct NestedExtensionIdRegistryImpl : ExtensionIdRegistryImpl { - explicit NestedExtensionIdRegistryImpl(const ExtensionIdRegistry* parent) - : parent_(parent) {} - - virtual ~NestedExtensionIdRegistryImpl() {} +template +using EnumParser = std::function(util::optional)>; - std::vector Uris() const override { - std::vector uris = parent_->Uris(); - std::unordered_set uri_set; - uri_set.insert(uris.begin(), uris.end()); - uri_set.insert(uris_.begin(), uris_.end()); - return std::vector(uris); +template +EnumParser GetEnumParser(const std::vector& options) { + std::unordered_map parse_map; + for (std::size_t i = 0; i < options.size(); i++) { + parse_map[options[i]] = static_cast(i + 1); } - - util::optional GetType(const DataType& type) const override { - auto type_opt = ExtensionIdRegistryImpl::GetType(type); - if (type_opt) { - return type_opt; + return [parse_map](util::optional enum_val) -> Result { + if (!enum_val) { + // Assumes 0 is always kUnspecified in Enum + return static_cast(0); } - return parent_->GetType(type); - } - - util::optional GetType(Id id) const override { - auto type_opt = ExtensionIdRegistryImpl::GetType(id); - if (type_opt) { - return type_opt; + auto maybe_parsed = parse_map.find(enum_val->to_string()); + if (maybe_parsed == parse_map.end()) { + return Status::Invalid("The value ", *enum_val, " is not an expected enum value"); } - return parent_->GetType(id); - } + return maybe_parsed->second; + }; +} - Status CanRegisterType(Id id, const std::shared_ptr& type) const override { - return parent_->CanRegisterType(id, type) & - ExtensionIdRegistryImpl::CanRegisterType(id, type); - } +enum class TemporalComponent { kUnspecified = 0, kYear, kMonth, kDay, kSecond }; +static std::vector kTemporalComponentOptions = {"YEAR", "MONTH", "DAY", + "SECOND"}; +static EnumParser kTemporalComponentParser = + GetEnumParser(kTemporalComponentOptions); + +enum class OverflowBehavior { kUnspecified = 0, kSilent, kSaturate, kError }; +static std::vector kOverflowOptions = {"SILENT", "SATURATE", "ERROR"}; +static EnumParser kOverflowParser = + GetEnumParser(kOverflowOptions); + +template +Result ParseEnumArg(const SubstraitCall& call, uint32_t arg_index, + const EnumParser& parser) { + ARROW_ASSIGN_OR_RAISE(util::optional enum_arg, + call.GetEnumArg(arg_index)); + return parser(enum_arg); +} - Status RegisterType(Id id, std::shared_ptr type) override { - return parent_->CanRegisterType(id, type) & - ExtensionIdRegistryImpl::RegisterType(id, type); +Result> GetValueArgs(const SubstraitCall& call, + int start_index) { + std::vector expressions; + for (uint32_t index = start_index; index < call.size(); index++) { + ARROW_ASSIGN_OR_RAISE(compute::Expression arg, call.GetValueArg(index)); + expressions.push_back(arg); } + return std::move(expressions); +} - util::optional GetFunction( - util::string_view arrow_function_name) const override { - auto func_opt = ExtensionIdRegistryImpl::GetFunction(arrow_function_name); - if (func_opt) { - return func_opt; +ExtensionIdRegistry::SubstraitCallToArrow DecodeOptionlessOverflowableArithmetic( + const std::string& function_name) { + return [function_name](const SubstraitCall& call) -> Result { + ARROW_ASSIGN_OR_RAISE(OverflowBehavior overflow_behavior, + ParseEnumArg(call, 0, kOverflowParser)); + ARROW_ASSIGN_OR_RAISE(std::vector value_args, + GetValueArgs(call, 1)); + if (overflow_behavior == OverflowBehavior::kUnspecified) { + overflow_behavior = OverflowBehavior::kSilent; } - return parent_->GetFunction(arrow_function_name); - } + if (overflow_behavior == OverflowBehavior::kSilent) { + return arrow::compute::call(function_name, std::move(value_args)); + } else if (overflow_behavior == OverflowBehavior::kError) { + return arrow::compute::call(function_name + "_checked", std::move(value_args)); + } else { + return Status::NotImplemented( + "Only SILENT and ERROR arithmetic kernels are currently implemented but ", + kOverflowOptions[static_cast(overflow_behavior) - 1], " was requested"); + } + }; +} - util::optional GetFunction(Id id) const override { - auto func_opt = ExtensionIdRegistryImpl::GetFunction(id); - if (func_opt) { - return func_opt; +template +ExtensionIdRegistry::ArrowToSubstraitCall EncodeOptionlessOverflowableArithmetic( + Id substrait_fn_id) { + return + [substrait_fn_id](const compute::Expression::Call& call) -> Result { + // nullable=true isn't quite correct but we don't know the nullability of + // the inputs + SubstraitCall substrait_call(substrait_fn_id, call.type.GetSharedPtr(), + /*nullable=*/true); + if (kChecked) { + substrait_call.SetEnumArg(0, "ERROR"); + } else { + substrait_call.SetEnumArg(0, "SILENT"); + } + for (std::size_t i = 0; i < call.arguments.size(); i++) { + substrait_call.SetValueArg(static_cast(i + 1), call.arguments[i]); + } + return std::move(substrait_call); + }; +} + +ExtensionIdRegistry::SubstraitCallToArrow DecodeOptionlessBasicMapping( + const std::string& function_name, uint32_t max_args) { + return [function_name, + max_args](const SubstraitCall& call) -> Result { + if (call.size() > max_args) { + return Status::NotImplemented("Acero does not have a kernel for ", function_name, + " that receives ", call.size(), " arguments"); } - return parent_->GetFunction(id); - } + ARROW_ASSIGN_OR_RAISE(std::vector value_args, + GetValueArgs(call, 0)); + return arrow::compute::call(function_name, std::move(value_args)); + }; +} - Status CanRegisterFunction(Id id, - const std::string& arrow_function_name) const override { - return parent_->CanRegisterFunction(id, arrow_function_name) & - ExtensionIdRegistryImpl::CanRegisterFunction(id, arrow_function_name); - } +ExtensionIdRegistry::SubstraitCallToArrow DecodeTemporalExtractionMapping() { + return [](const SubstraitCall& call) -> Result { + ARROW_ASSIGN_OR_RAISE(TemporalComponent temporal_component, + ParseEnumArg(call, 0, kTemporalComponentParser)); + if (temporal_component == TemporalComponent::kUnspecified) { + return Status::Invalid( + "The temporal component enum is a require option for the extract function " + "and is not specified"); + } + ARROW_ASSIGN_OR_RAISE(std::vector value_args, + GetValueArgs(call, 1)); + std::string func_name; + switch (temporal_component) { + case TemporalComponent::kYear: + func_name = "year"; + break; + case TemporalComponent::kMonth: + func_name = "month"; + break; + case TemporalComponent::kDay: + func_name = "day"; + break; + case TemporalComponent::kSecond: + func_name = "second"; + break; + default: + return Status::Invalid("Unexpected value for temporal component in extract call"); + } + return compute::call(func_name, std::move(value_args)); + }; +} - Status RegisterFunction(Id id, std::string arrow_function_name) override { - return parent_->CanRegisterFunction(id, arrow_function_name) & - ExtensionIdRegistryImpl::RegisterFunction(id, arrow_function_name); - } +ExtensionIdRegistry::SubstraitCallToArrow DecodeConcatMapping() { + return [](const SubstraitCall& call) -> Result { + ARROW_ASSIGN_OR_RAISE(std::vector value_args, + GetValueArgs(call, 0)); + value_args.push_back(compute::literal("")); + return compute::call("binary_join_element_wise", std::move(value_args)); + }; +} - const ExtensionIdRegistry* parent_; -}; +ExtensionIdRegistry::SubstraitAggregateToArrow DecodeBasicAggregate( + const std::string& arrow_function_name) { + return [arrow_function_name](const SubstraitCall& call) -> Result { + if (call.size() != 1) { + return Status::NotImplemented( + "Only unary aggregate functions are currently supported"); + } + ARROW_ASSIGN_OR_RAISE(compute::Expression arg, call.GetValueArg(0)); + const FieldRef* arg_ref = arg.field_ref(); + if (!arg_ref) { + return Status::Invalid("Expected an aggregate call ", call.id().uri, "#", + call.id().name, " to have a direct reference"); + } + std::string fixed_arrow_func = arrow_function_name; + if (call.is_hash()) { + fixed_arrow_func = "hash_" + arrow_function_name; + } + return compute::Aggregate{std::move(fixed_arrow_func), nullptr, *arg_ref, ""}; + }; +} struct DefaultExtensionIdRegistry : ExtensionIdRegistryImpl { DefaultExtensionIdRegistry() { + // ----------- Extension Types ---------------------------- struct TypeName { std::shared_ptr type; util::string_view name; @@ -428,32 +795,91 @@ struct DefaultExtensionIdRegistry : ExtensionIdRegistryImpl { DCHECK_OK(RegisterType({kArrowExtTypesUri, e.name}, std::move(e.type))); } - for (TypeName e : { - TypeName{null(), "null"}, - TypeName{month_interval(), "interval_month"}, - TypeName{day_time_interval(), "interval_day_milli"}, - TypeName{month_day_nano_interval(), "interval_month_day_nano"}, - }) { + for (TypeName e : + {TypeName{null(), "null"}, TypeName{month_interval(), "interval_month"}, + TypeName{day_time_interval(), "interval_day_milli"}, + TypeName{month_day_nano_interval(), "interval_month_day_nano"}}) { DCHECK_OK(RegisterType({kArrowExtTypesUri, e.name}, std::move(e.type))); } - // TODO: this is just a placeholder right now. We'll need a YAML file for - // all functions (and prototypes) that Arrow provides that are relevant - // for Substrait, and include mappings for all of them here. See - // ARROW-15535. - for (util::string_view name : { - "add", - "equal", - "is_not_distinct_from", - "hash_count", - }) { - DCHECK_OK(RegisterFunction({kArrowExtTypesUri, name}, name.to_string())); + // -------------- Substrait -> Arrow Functions ----------------- + // Mappings with a _checked variant + for (const auto& function_name : {"add", "subtract", "multiply", "divide"}) { + DCHECK_OK( + AddSubstraitCallToArrow({kSubstraitArithmeticFunctionsUri, function_name}, + DecodeOptionlessOverflowableArithmetic(function_name))); + } + // Basic mappings that need _kleene appended to them + for (const auto& function_name : {"or", "and"}) { + DCHECK_OK(AddSubstraitCallToArrow( + {kSubstraitBooleanFunctionsUri, function_name}, + DecodeOptionlessBasicMapping(std::string(function_name) + "_kleene", + /*max_args=*/2))); + } + // Basic binary mappings + for (const auto& function_name : + std::vector>{ + {kSubstraitBooleanFunctionsUri, "xor"}, + {kSubstraitComparisonFunctionsUri, "equal"}, + {kSubstraitComparisonFunctionsUri, "not_equal"}}) { + DCHECK_OK( + AddSubstraitCallToArrow({function_name.first, function_name.second}, + DecodeOptionlessBasicMapping( + function_name.second.to_string(), /*max_args=*/2))); + } + for (const auto& uri : + {kSubstraitComparisonFunctionsUri, kSubstraitDatetimeFunctionsUri}) { + DCHECK_OK(AddSubstraitCallToArrow( + {uri, "lt"}, DecodeOptionlessBasicMapping("less", /*max_args=*/2))); + DCHECK_OK(AddSubstraitCallToArrow( + {uri, "lte"}, DecodeOptionlessBasicMapping("less_equal", /*max_args=*/2))); + DCHECK_OK(AddSubstraitCallToArrow( + {uri, "gt"}, DecodeOptionlessBasicMapping("greater", /*max_args=*/2))); + DCHECK_OK(AddSubstraitCallToArrow( + {uri, "gte"}, DecodeOptionlessBasicMapping("greater_equal", /*max_args=*/2))); + } + // One-off mappings + DCHECK_OK( + AddSubstraitCallToArrow({kSubstraitBooleanFunctionsUri, "not"}, + DecodeOptionlessBasicMapping("invert", /*max_args=*/1))); + DCHECK_OK(AddSubstraitCallToArrow({kSubstraitDatetimeFunctionsUri, "extract"}, + DecodeTemporalExtractionMapping())); + DCHECK_OK(AddSubstraitCallToArrow({kSubstraitStringFunctionsUri, "concat"}, + DecodeConcatMapping())); + + // --------------- Substrait -> Arrow Aggregates -------------- + for (const auto& fn_name : {"sum", "min", "max"}) { + DCHECK_OK(AddSubstraitAggregateToArrow({kSubstraitArithmeticFunctionsUri, fn_name}, + DecodeBasicAggregate(fn_name))); + } + DCHECK_OK(AddSubstraitAggregateToArrow({kSubstraitArithmeticFunctionsUri, "avg"}, + DecodeBasicAggregate("mean"))); + + // --------------- Arrow -> Substrait Functions --------------- + for (const auto& fn_name : {"add", "subtract", "multiply", "divide"}) { + Id fn_id{kSubstraitArithmeticFunctionsUri, fn_name}; + DCHECK_OK(AddArrowToSubstraitCall( + fn_name, EncodeOptionlessOverflowableArithmetic(fn_id))); + DCHECK_OK( + AddArrowToSubstraitCall(std::string(fn_name) + "_checked", + EncodeOptionlessOverflowableArithmetic(fn_id))); } } }; } // namespace +Status ExtensionIdRegistryImpl::AddSubstraitCallToArrow(Id substrait_function_id, + std::string arrow_function_name) { + return AddSubstraitCallToArrow( + substrait_function_id, + [arrow_function_name](const SubstraitCall& call) -> Result { + ARROW_ASSIGN_OR_RAISE(std::vector value_args, + GetValueArgs(call, 0)); + return compute::call(arrow_function_name, std::move(value_args)); + }); +} + ExtensionIdRegistry* default_extension_id_registry() { static DefaultExtensionIdRegistry impl_; return &impl_; @@ -461,7 +887,7 @@ ExtensionIdRegistry* default_extension_id_registry() { std::shared_ptr nested_extension_id_registry( const ExtensionIdRegistry* parent) { - return std::make_shared(parent); + return std::make_shared(parent); } } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index 04e4586a9f5e2..9cb42f66136b9 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -19,26 +19,130 @@ #pragma once +#include #include +#include #include +#include "arrow/compute/exec/exec_plan.h" +#include "arrow/compute/exec/expression.h" #include "arrow/engine/substrait/visibility.h" +#include "arrow/result.h" #include "arrow/type_fwd.h" +#include "arrow/util/hash_util.h" +#include "arrow/util/hashing.h" #include "arrow/util/optional.h" #include "arrow/util/string_view.h" -#include "arrow/util/hash_util.h" - namespace arrow { namespace engine { +constexpr const char* kSubstraitArithmeticFunctionsUri = + "https://github.com/substrait-io/substrait/blob/main/extensions/" + "functions_arithmetic.yaml"; +constexpr const char* kSubstraitBooleanFunctionsUri = + "https://github.com/substrait-io/substrait/blob/main/extensions/" + "functions_boolean.yaml"; +constexpr const char* kSubstraitComparisonFunctionsUri = + "https://github.com/substrait-io/substrait/blob/main/extensions/" + "functions_comparison.yaml"; +constexpr const char* kSubstraitDatetimeFunctionsUri = + "https://github.com/substrait-io/substrait/blob/main/extensions/" + "functions_datetime.yaml"; +constexpr const char* kSubstraitStringFunctionsUri = + "https://github.com/substrait-io/substrait/blob/main/extensions/" + "functions_string.yaml"; + +struct Id { + util::string_view uri, name; + bool empty() const { return uri.empty() && name.empty(); } + std::string ToString() const; +}; +struct IdHashEq { + size_t operator()(Id id) const; + bool operator()(Id l, Id r) const; +}; + +/// \brief Owning storage for ids +/// +/// Substrait plans may reuse URIs and names in many places. For convenience +/// and performance Substarit ids are typically passed around as views. As we +/// convert a plan from Substrait to Arrow we need to copy these strings out of +/// the Substrait buffer and into owned storage. This class serves as that owned +/// storage. +class IdStorage { + public: + /// \brief Get an equivalent id pointing into this storage + /// + /// This operation will copy the ids into storage if they do not already exist + Id Emplace(Id id); + /// \brief Get an equivalent view pointing into this storage for a URI + /// + /// If no URI is found then the uri will be copied into storage + util::string_view EmplaceUri(util::string_view uri); + /// \brief Get an equivalent id pointing into this storage + /// + /// If no id is found then nullopt will be returned + util::optional Find(Id id) const; + /// \brief Get an equivalent view pointing into this storage for a URI + /// + /// If no URI is found then nullopt will be returned + util::optional FindUri(util::string_view uri) const; + + private: + std::unordered_set uris_; + std::unordered_set names_; + std::list owned_uris_; + std::list owned_names_; +}; + +/// \brief Describes a Substrait call +/// +/// Substrait call expressions contain a list of arguments which can either +/// be enum arguments (which are serialized as strings), value arguments (which) +/// are Arrow expressions, or type arguments (not yet implemented) +class SubstraitCall { + public: + SubstraitCall(Id id, std::shared_ptr output_type, bool output_nullable, + bool is_hash = false) + : id_(id), + output_type_(std::move(output_type)), + output_nullable_(output_nullable), + is_hash_(is_hash) {} + + const Id& id() const { return id_; } + const std::shared_ptr& output_type() const { return output_type_; } + bool output_nullable() const { return output_nullable_; } + bool is_hash() const { return is_hash_; } + + bool HasEnumArg(uint32_t index) const; + Result> GetEnumArg(uint32_t index) const; + void SetEnumArg(uint32_t index, util::optional enum_arg); + Result GetValueArg(uint32_t index) const; + bool HasValueArg(uint32_t index) const; + void SetValueArg(uint32_t index, compute::Expression value_arg); + uint32_t size() const { return size_; } + + private: + Id id_; + std::shared_ptr output_type_; + bool output_nullable_; + // Only needed when converting from Substrait -> Arrow aggregates. The + // Arrow function name depends on whether or not there are any groups + bool is_hash_; + std::unordered_map> enum_args_; + std::unordered_map value_args_; + uint32_t size_ = 0; +}; + /// Substrait identifies functions and custom data types using a (uri, name) pair. /// -/// This registry is a bidirectional mapping between Substrait IDs and their corresponding -/// Arrow counterparts (arrow::DataType and function names in a function registry) +/// This registry is a bidirectional mapping between Substrait IDs and their +/// corresponding Arrow counterparts (arrow::DataType and function names in a function +/// registry) /// -/// Substrait extension types and variations must be registered with their corresponding -/// arrow::DataType before they can be used! +/// Substrait extension types and variations must be registered with their +/// corresponding arrow::DataType before they can be used! /// /// Conceptually this can be thought of as two pairs of `unordered_map`s. One pair to /// go back and forth between Substrait ID and arrow::DataType and another pair to go @@ -49,56 +153,103 @@ namespace engine { /// instance). class ARROW_ENGINE_EXPORT ExtensionIdRegistry { public: - /// All uris registered in this ExtensionIdRegistry - virtual std::vector Uris() const = 0; - - struct Id { - util::string_view uri, name; - - bool empty() const { return uri.empty() && name.empty(); } - }; - - struct IdHashEq { - size_t operator()(Id id) const; - bool operator()(Id l, Id r) const; - }; + using ArrowToSubstraitCall = + std::function(const arrow::compute::Expression::Call&)>; + using SubstraitCallToArrow = + std::function(const SubstraitCall&)>; + using ArrowToSubstraitAggregate = + std::function(const arrow::compute::Aggregate&)>; + using SubstraitAggregateToArrow = + std::function(const SubstraitCall&)>; /// \brief A mapping between a Substrait ID and an arrow::DataType struct TypeRecord { Id id; const std::shared_ptr& type; }; + + /// \brief Return a uri view owned by this registry + /// + /// If the URI has never been emplaced it will return nullopt + virtual util::optional FindUri(util::string_view uri) const = 0; + /// \brief Return a id view owned by this registry + /// + /// If the id has never been emplaced it will return nullopt + virtual util::optional FindId(Id id) const = 0; virtual util::optional GetType(const DataType&) const = 0; virtual util::optional GetType(Id) const = 0; virtual Status CanRegisterType(Id, const std::shared_ptr& type) const = 0; virtual Status RegisterType(Id, std::shared_ptr) = 0; + /// \brief Register a converter that converts an Arrow call to a Substrait call + /// + /// Note that there may not be 1:1 parity between ArrowToSubstraitCall and + /// SubstraitCallToArrow because some standard functions (e.g. add) may map to + /// multiple Arrow functions (e.g. add, add_checked) + virtual Status AddArrowToSubstraitCall(std::string arrow_function_name, + ArrowToSubstraitCall conversion_func) = 0; + /// \brief Check to see if a converter can be registered + /// + /// \return Status::OK if there are no conflicts, otherwise an error is returned + virtual Status CanAddArrowToSubstraitCall( + const std::string& arrow_function_name) const = 0; - /// \brief A mapping between a Substrait ID and an Arrow function + /// \brief Register a converter that converts an Arrow aggregate to a Substrait + /// aggregate + virtual Status AddArrowToSubstraitAggregate( + std::string arrow_function_name, ArrowToSubstraitAggregate conversion_func) = 0; + /// \brief Check to see if a converter can be registered /// - /// Note: At the moment we identify functions solely by the name - /// of the function in the function registry. + /// \return Status::OK if there are no conflicts, otherwise an error is returned + virtual Status CanAddArrowToSubstraitAggregate( + const std::string& arrow_function_name) const = 0; + + /// \brief Register a converter that converts a Substrait call to an Arrow call + virtual Status AddSubstraitCallToArrow(Id substrait_function_id, + SubstraitCallToArrow conversion_func) = 0; + /// \brief Check to see if a converter can be registered /// - /// TODO(ARROW-15582) some functions will not be simple enough to convert without access - /// to their arguments/options. For example is_in embeds the set in options rather than - /// using an argument: - /// is_in(x, SetLookupOptions(set)) <-> (k...Uri, "is_in")(x, set) + /// \return Status::OK if there are no conflicts, otherwise an error is returned + virtual Status CanAddSubstraitCallToArrow(Id substrait_function_id) const = 0; + /// \brief Register a simple mapping function /// - /// ... for another example, depending on the value of the first argument to - /// substrait::add it either corresponds to arrow::add or arrow::add_checked - struct FunctionRecord { - Id id; - const std::string& function_name; - }; - virtual util::optional GetFunction(Id) const = 0; - virtual util::optional GetFunction( - util::string_view arrow_function_name) const = 0; - virtual Status CanRegisterFunction(Id, - const std::string& arrow_function_name) const = 0; - // registers a function without taking ownership of uri and name within Id - virtual Status RegisterFunction(Id, std::string arrow_function_name) = 0; - // registers a function while taking ownership of uri and name - virtual Status RegisterFunction(std::string uri, std::string name, - std::string arrow_function_name) = 0; + /// All calls to the function must pass only value arguments. The arguments + /// will be converted to expressions and passed to the Arrow function + virtual Status AddSubstraitCallToArrow(Id substrait_function_id, + std::string arrow_function_name) = 0; + + /// \brief Register a converter that converts a Substrait aggregate to an Arrow + /// aggregate + virtual Status AddSubstraitAggregateToArrow( + Id substrait_function_id, SubstraitAggregateToArrow conversion_func) = 0; + /// \brief Check to see if a converter can be registered + /// + /// \return Status::OK if there are no conflicts, otherwise an error is returned + virtual Status CanAddSubstraitAggregateToArrow(Id substrait_function_id) const = 0; + + /// \brief Return a list of Substrait functions that have a converter + /// + /// The function ids are encoded as strings using the pattern {uri}#{name} + virtual std::vector GetSupportedSubstraitFunctions() const = 0; + + /// \brief Find a converter to map Arrow calls to Substrait calls + /// \return A converter function or an invalid status if no converter is registered + virtual Result GetArrowToSubstraitCall( + const std::string& arrow_function_name) const = 0; + + /// \brief Find a converter to map Arrow aggregates to Substrait aggregates + /// \return A converter function or an invalid status if no converter is registered + virtual Result GetArrowToSubstraitAggregate( + const std::string& arrow_function_name) const = 0; + + /// \brief Find a converter to map a Substrait aggregate to an Arrow aggregate + /// \return A converter function or an invalid status if no converter is registered + virtual Result GetSubstraitAggregateToArrow( + Id substrait_function_id) const = 0; + + /// \brief Find a converter to map a Substrait call to an Arrow call + /// \return A converter function or an invalid status if no converter is registered + virtual Result GetSubstraitCallToArrow( + Id substrait_function_id) const = 0; }; constexpr util::string_view kArrowExtTypesUri = @@ -153,9 +304,6 @@ ARROW_ENGINE_EXPORT std::shared_ptr nested_extension_id_reg /// ExtensionIdRegistry. class ARROW_ENGINE_EXPORT ExtensionSet { public: - using Id = ExtensionIdRegistry::Id; - using IdHashEq = ExtensionIdRegistry::IdHashEq; - struct FunctionRecord { Id id; util::string_view name; @@ -219,12 +367,12 @@ class ARROW_ENGINE_EXPORT ExtensionSet { /// \return An anchor that can be used to refer to the type within a plan Result EncodeType(const DataType& type); - /// \brief Returns a function given an anchor + /// \brief Return a function id given an anchor /// /// This is used when converting a Substrait plan to an Arrow execution plan. /// /// If the anchor does not exist in this extension set an error will be returned. - Result DecodeFunction(uint32_t anchor) const; + Result DecodeFunction(uint32_t anchor) const; /// \brief Lookup the anchor for a given function /// @@ -239,26 +387,30 @@ class ARROW_ENGINE_EXPORT ExtensionSet { /// returned. /// /// \return An anchor that can be used to refer to the function within a plan - Result EncodeFunction(util::string_view function_name); + Result EncodeFunction(Id function_id); - /// \brief Returns the number of custom functions in this extension set - /// - /// Note: the functions are currently stored as a sparse vector, so this may return a - /// value larger than the actual number of functions. This behavior may change in the - /// future; see ARROW-15583. + /// \brief Return the number of custom functions in this extension set std::size_t num_functions() const { return functions_.size(); } + const ExtensionIdRegistry* registry() const { return registry_; } + private: const ExtensionIdRegistry* registry_; + // If the registry is not aware of an id then we probably can't do anything + // with it. However, in some cases, these may represent extensions or features + // that we can safely ignore. For example, we can usually safely ignore + // extension type variations if we assume the plan is valid. These ignorable + // ids are stored here. + IdStorage plan_specific_ids_; // Map from anchor values to URI values referenced by this extension set std::unordered_map uris_; // Map from anchor values to type definitions, used during Substrait->Arrow // and populated from the Substrait extension set std::unordered_map types_; - // Map from anchor values to function definitions, used during Substrait->Arrow + // Map from anchor values to function ids, used during Substrait->Arrow // and populated from the Substrait extension set - std::unordered_map functions_; + std::unordered_map functions_; // Map from type names to anchor values. Used during Arrow->Substrait // and built as the plan is created. std::unordered_map types_map_; diff --git a/cpp/src/arrow/engine/substrait/function_test.cc b/cpp/src/arrow/engine/substrait/function_test.cc new file mode 100644 index 0000000000000..225bc56d13681 --- /dev/null +++ b/cpp/src/arrow/engine/substrait/function_test.cc @@ -0,0 +1,495 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include + +#include "arrow/array.h" +#include "arrow/array/builder_binary.h" +#include "arrow/compute/cast.h" +#include "arrow/compute/exec/options.h" +#include "arrow/compute/exec/util.h" +#include "arrow/engine/substrait/extension_set.h" +#include "arrow/engine/substrait/plan_internal.h" +#include "arrow/engine/substrait/serde.h" +#include "arrow/engine/substrait/test_plan_builder.h" +#include "arrow/engine/substrait/type_internal.h" +#include "arrow/record_batch.h" +#include "arrow/table.h" +#include "arrow/testing/future_util.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/type.h" + +namespace arrow { + +namespace engine { +struct FunctionTestCase { + Id function_id; + std::vector arguments; + std::vector> data_types; + // For a test case that should fail just use the empty string + std::string expected_output; + std::shared_ptr expected_output_type; +}; + +Result> GetArray(const std::string& value, + const std::shared_ptr& data_type) { + StringBuilder str_builder; + if (value.empty()) { + ARROW_EXPECT_OK(str_builder.AppendNull()); + } else { + ARROW_EXPECT_OK(str_builder.Append(value)); + } + ARROW_ASSIGN_OR_RAISE(std::shared_ptr value_str, str_builder.Finish()); + ARROW_ASSIGN_OR_RAISE(Datum value_datum, compute::Cast(value_str, data_type)); + return value_datum.make_array(); +} + +Result> GetInputTable( + const std::vector& arguments, + const std::vector>& data_types) { + std::vector> columns; + std::vector> fields; + EXPECT_EQ(arguments.size(), data_types.size()); + for (std::size_t i = 0; i < arguments.size(); i++) { + if (data_types[i]) { + ARROW_ASSIGN_OR_RAISE(std::shared_ptr arg_array, + GetArray(arguments[i], data_types[i])); + columns.push_back(std::move(arg_array)); + fields.push_back(field("arg_" + std::to_string(i), data_types[i])); + } + } + std::shared_ptr batch = + RecordBatch::Make(schema(std::move(fields)), 1, columns); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr
table, Table::FromRecordBatches({batch})); + return table; +} + +Result> GetOutputTable( + const std::string& output_value, const std::shared_ptr& output_type) { + std::vector> columns(1); + std::vector> fields(1); + ARROW_ASSIGN_OR_RAISE(columns[0], GetArray(output_value, output_type)); + fields[0] = field("output", output_type); + std::shared_ptr batch = + RecordBatch::Make(schema(std::move(fields)), 1, columns); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr
table, Table::FromRecordBatches({batch})); + return table; +} + +Result> PlanFromTestCase( + const FunctionTestCase& test_case, std::shared_ptr
* output_table) { + ARROW_ASSIGN_OR_RAISE(std::shared_ptr
input_table, + GetInputTable(test_case.arguments, test_case.data_types)); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr substrait, + internal::CreateScanProjectSubstrait( + test_case.function_id, input_table, test_case.arguments, + test_case.data_types, *test_case.expected_output_type)); + std::shared_ptr consumer = + std::make_shared(output_table, + default_memory_pool()); + + // Mock table provider that ignores the table name and returns input_table + NamedTableProvider table_provider = [input_table](const std::vector&) { + std::shared_ptr options = + std::make_shared(input_table); + return compute::Declaration("table_source", {}, options, "mock_source"); + }; + + ConversionOptions conversion_options; + conversion_options.named_table_provider = std::move(table_provider); + + ARROW_ASSIGN_OR_RAISE( + std::shared_ptr plan, + DeserializePlan(*substrait, std::move(consumer), default_extension_id_registry(), + /*ext_set_out=*/nullptr, conversion_options)); + return plan; +} + +void CheckValidTestCases(const std::vector& valid_cases) { + for (const FunctionTestCase& test_case : valid_cases) { + std::shared_ptr
output_table; + ASSERT_OK_AND_ASSIGN(std::shared_ptr plan, + PlanFromTestCase(test_case, &output_table)); + ASSERT_OK(plan->StartProducing()); + ASSERT_FINISHES_OK(plan->finished()); + + // Could also modify the Substrait plan with an emit to drop the leading columns + ASSERT_OK_AND_ASSIGN(output_table, + output_table->SelectColumns({output_table->num_columns() - 1})); + + ASSERT_OK_AND_ASSIGN( + std::shared_ptr
expected_output, + GetOutputTable(test_case.expected_output, test_case.expected_output_type)); + AssertTablesEqual(*expected_output, *output_table, /*same_chunk_layout=*/false); + } +} + +void CheckErrorTestCases(const std::vector& error_cases) { + for (const FunctionTestCase& test_case : error_cases) { + std::shared_ptr
output_table; + ASSERT_OK_AND_ASSIGN(std::shared_ptr plan, + PlanFromTestCase(test_case, &output_table)); + ASSERT_OK(plan->StartProducing()); + ASSERT_FINISHES_AND_RAISES(Invalid, plan->finished()); + } +} + +// These are not meant to be an exhaustive test of Substrait +// conformance. Instead, we should test just enough to ensure +// we are mapping to the correct function +TEST(FunctionMapping, ValidCases) { + const std::vector valid_test_cases = { + {{kSubstraitArithmeticFunctionsUri, "add"}, + {"SILENT", "127", "10"}, + {nullptr, int8(), int8()}, + "-119", + int8()}, + {{kSubstraitArithmeticFunctionsUri, "subtract"}, + {"SILENT", "-119", "10"}, + {nullptr, int8(), int8()}, + "127", + int8()}, + {{kSubstraitArithmeticFunctionsUri, "multiply"}, + {"SILENT", "10", "13"}, + {nullptr, int8(), int8()}, + "-126", + int8()}, + {{kSubstraitArithmeticFunctionsUri, "divide"}, + {"SILENT", "-128", "-1"}, + {nullptr, int8(), int8()}, + "0", + int8()}, + {{kSubstraitBooleanFunctionsUri, "or"}, + {"1", ""}, + {boolean(), boolean()}, + "1", + boolean()}, + {{kSubstraitBooleanFunctionsUri, "and"}, + {"1", ""}, + {boolean(), boolean()}, + "", + boolean()}, + {{kSubstraitBooleanFunctionsUri, "xor"}, + {"1", "1"}, + {boolean(), boolean()}, + "0", + boolean()}, + {{kSubstraitBooleanFunctionsUri, "not"}, {"1"}, {boolean()}, "0", boolean()}, + {{kSubstraitComparisonFunctionsUri, "equal"}, + {"57", "57"}, + {int8(), int8()}, + "1", + boolean()}, + {{kSubstraitComparisonFunctionsUri, "not_equal"}, + {"57", "57"}, + {int8(), int8()}, + "0", + boolean()}, + {{kSubstraitComparisonFunctionsUri, "lt"}, + {"57", "80"}, + {int8(), int8()}, + "1", + boolean()}, + {{kSubstraitComparisonFunctionsUri, "lt"}, + {"57", "57"}, + {int8(), int8()}, + "0", + boolean()}, + {{kSubstraitComparisonFunctionsUri, "gt"}, + {"57", "30"}, + {int8(), int8()}, + "1", + boolean()}, + {{kSubstraitComparisonFunctionsUri, "gt"}, + {"57", "57"}, + {int8(), int8()}, + "0", + boolean()}, + {{kSubstraitComparisonFunctionsUri, "lte"}, + {"57", "57"}, + {int8(), int8()}, + "1", + boolean()}, + {{kSubstraitComparisonFunctionsUri, "lte"}, + {"50", "57"}, + {int8(), int8()}, + "1", + boolean()}, + {{kSubstraitComparisonFunctionsUri, "gte"}, + {"57", "57"}, + {int8(), int8()}, + "1", + boolean()}, + {{kSubstraitComparisonFunctionsUri, "gte"}, + {"60", "57"}, + {int8(), int8()}, + "1", + boolean()}, + {{kSubstraitDatetimeFunctionsUri, "extract"}, + {"YEAR", "2022-07-15T14:33:14"}, + {nullptr, timestamp(TimeUnit::MICRO)}, + "2022", + int64()}, + {{kSubstraitDatetimeFunctionsUri, "extract"}, + {"MONTH", "2022-07-15T14:33:14"}, + {nullptr, timestamp(TimeUnit::MICRO)}, + "7", + int64()}, + {{kSubstraitDatetimeFunctionsUri, "extract"}, + {"DAY", "2022-07-15T14:33:14"}, + {nullptr, timestamp(TimeUnit::MICRO)}, + "15", + int64()}, + {{kSubstraitDatetimeFunctionsUri, "extract"}, + {"SECOND", "2022-07-15T14:33:14"}, + {nullptr, timestamp(TimeUnit::MICRO)}, + "14", + int64()}, + {{kSubstraitDatetimeFunctionsUri, "extract"}, + {"YEAR", "2022-07-15T14:33:14Z"}, + {nullptr, timestamp(TimeUnit::MICRO, "UTC")}, + "2022", + int64()}, + {{kSubstraitDatetimeFunctionsUri, "extract"}, + {"MONTH", "2022-07-15T14:33:14Z"}, + {nullptr, timestamp(TimeUnit::MICRO, "UTC")}, + "7", + int64()}, + {{kSubstraitDatetimeFunctionsUri, "extract"}, + {"DAY", "2022-07-15T14:33:14Z"}, + {nullptr, timestamp(TimeUnit::MICRO, "UTC")}, + "15", + int64()}, + {{kSubstraitDatetimeFunctionsUri, "extract"}, + {"SECOND", "2022-07-15T14:33:14Z"}, + {nullptr, timestamp(TimeUnit::MICRO, "UTC")}, + "14", + int64()}, + {{kSubstraitDatetimeFunctionsUri, "lt"}, + {"2022-07-15T14:33:14", "2022-07-15T14:33:20"}, + {timestamp(TimeUnit::MICRO), timestamp(TimeUnit::MICRO)}, + "1", + boolean()}, + {{kSubstraitDatetimeFunctionsUri, "lte"}, + {"2022-07-15T14:33:14", "2022-07-15T14:33:14"}, + {timestamp(TimeUnit::MICRO), timestamp(TimeUnit::MICRO)}, + "1", + boolean()}, + {{kSubstraitDatetimeFunctionsUri, "gt"}, + {"2022-07-15T14:33:30", "2022-07-15T14:33:14"}, + {timestamp(TimeUnit::MICRO), timestamp(TimeUnit::MICRO)}, + "1", + boolean()}, + {{kSubstraitDatetimeFunctionsUri, "gte"}, + {"2022-07-15T14:33:14", "2022-07-15T14:33:14"}, + {timestamp(TimeUnit::MICRO), timestamp(TimeUnit::MICRO)}, + "1", + boolean()}, + {{kSubstraitStringFunctionsUri, "concat"}, + {"abc", "def"}, + {utf8(), utf8()}, + "abcdef", + utf8()}}; + CheckValidTestCases(valid_test_cases); +} + +TEST(FunctionMapping, ErrorCases) { + const std::vector error_test_cases = { + {{kSubstraitArithmeticFunctionsUri, "add"}, + {"ERROR", "127", "10"}, + {nullptr, int8(), int8()}, + "", + int8()}, + {{kSubstraitArithmeticFunctionsUri, "subtract"}, + {"ERROR", "-119", "10"}, + {nullptr, int8(), int8()}, + "", + int8()}, + {{kSubstraitArithmeticFunctionsUri, "multiply"}, + {"ERROR", "10", "13"}, + {nullptr, int8(), int8()}, + "", + int8()}, + {{kSubstraitArithmeticFunctionsUri, "divide"}, + {"ERROR", "-128", "-1"}, + {nullptr, int8(), int8()}, + "", + int8()}}; + CheckErrorTestCases(error_test_cases); +} + +// For each aggregate test case we take in three values. We compute the +// aggregate both on the entire set (all three values) and on groups. The +// first two rows will be in the first group and the last row will be in the +// second group. It's important to test both for coverage since the arrow +// function used actually changes when group ids are present +struct AggregateTestCase { + // The substrait function id + Id function_id; + // The three values, as a JSON string + std::string arguments; + // The data type of the three values + std::shared_ptr data_type; + // The result of the aggregate on all three + std::string combined_output; + // The result of the aggregate on each group (i.e. the first two rows + // and the last row). Should be a json-encoded array of size 2 + std::string group_outputs; + // The data type of the outputs + std::shared_ptr output_type; +}; + +std::shared_ptr
GetInputTableForAggregateCase(const AggregateTestCase& test_case) { + std::vector> columns(2); + std::vector> fields(2); + columns[0] = ArrayFromJSON(int8(), "[1, 1, 2]"); + columns[1] = ArrayFromJSON(test_case.data_type, test_case.arguments); + fields[0] = field("key", int8()); + fields[1] = field("value", test_case.data_type); + std::shared_ptr batch = + RecordBatch::Make(schema(std::move(fields)), /*num_rows=*/3, std::move(columns)); + EXPECT_OK_AND_ASSIGN(std::shared_ptr
table, Table::FromRecordBatches({batch})); + return table; +} + +std::shared_ptr
GetOutputTableForAggregateCase( + const std::shared_ptr& output_type, const std::string& json_data) { + std::shared_ptr out_arr = ArrayFromJSON(output_type, json_data); + std::shared_ptr batch = + RecordBatch::Make(schema({field("", output_type)}), 1, {out_arr}); + EXPECT_OK_AND_ASSIGN(std::shared_ptr
table, Table::FromRecordBatches({batch})); + return table; +} + +std::shared_ptr PlanFromAggregateCase( + const AggregateTestCase& test_case, std::shared_ptr
* output_table, + bool with_keys) { + std::shared_ptr
input_table = GetInputTableForAggregateCase(test_case); + std::vector key_idxs = {}; + if (with_keys) { + key_idxs = {0}; + } + EXPECT_OK_AND_ASSIGN( + std::shared_ptr substrait, + internal::CreateScanAggSubstrait(test_case.function_id, input_table, key_idxs, + /*arg_idx=*/1, *test_case.output_type)); + std::shared_ptr consumer = + std::make_shared(output_table, + default_memory_pool()); + + // Mock table provider that ignores the table name and returns input_table + NamedTableProvider table_provider = [input_table](const std::vector&) { + std::shared_ptr options = + std::make_shared(input_table); + return compute::Declaration("table_source", {}, options, "mock_source"); + }; + + ConversionOptions conversion_options; + conversion_options.named_table_provider = std::move(table_provider); + + EXPECT_OK_AND_ASSIGN( + std::shared_ptr plan, + DeserializePlan(*substrait, std::move(consumer), default_extension_id_registry(), + /*ext_set_out=*/nullptr, conversion_options)); + return plan; +} + +void CheckWholeAggregateCase(const AggregateTestCase& test_case) { + std::shared_ptr
output_table; + std::shared_ptr plan = + PlanFromAggregateCase(test_case, &output_table, /*with_keys=*/false); + + ASSERT_OK(plan->StartProducing()); + ASSERT_FINISHES_OK(plan->finished()); + + ASSERT_OK_AND_ASSIGN(output_table, + output_table->SelectColumns({output_table->num_columns() - 1})); + + std::shared_ptr
expected_output = + GetOutputTableForAggregateCase(test_case.output_type, test_case.combined_output); + AssertTablesEqual(*expected_output, *output_table, /*same_chunk_layout=*/false); +} + +void CheckGroupedAggregateCase(const AggregateTestCase& test_case) { + std::shared_ptr
output_table; + std::shared_ptr plan = + PlanFromAggregateCase(test_case, &output_table, /*with_keys=*/true); + + ASSERT_OK(plan->StartProducing()); + ASSERT_FINISHES_OK(plan->finished()); + + // The aggregate node's output is unpredictable so we sort by the key column + ASSERT_OK_AND_ASSIGN( + std::shared_ptr sort_indices, + compute::SortIndices(output_table, compute::SortOptions({compute::SortKey( + output_table->num_columns() - 1, + compute::SortOrder::Ascending)}))); + ASSERT_OK_AND_ASSIGN(Datum sorted_table_datum, + compute::Take(output_table, sort_indices)); + output_table = sorted_table_datum.table(); + // TODO(ARROW-17245) We should be selecting N-1 here but Acero + // currently emits things in reverse order + ASSERT_OK_AND_ASSIGN(output_table, output_table->SelectColumns({0})); + + std::shared_ptr
expected_output = + GetOutputTableForAggregateCase(test_case.output_type, test_case.group_outputs); + + AssertTablesEqual(*expected_output, *output_table, /*same_chunk_layout=*/false); +} + +void CheckAggregateCases(const std::vector& test_cases) { + for (const AggregateTestCase& test_case : test_cases) { + CheckWholeAggregateCase(test_case); + CheckGroupedAggregateCase(test_case); + } +} + +TEST(FunctionMapping, AggregateCases) { + const std::vector test_cases = { + {{kSubstraitArithmeticFunctionsUri, "sum"}, + "[1, 2, 3]", + int8(), + "[6]", + "[3, 3]", + int64()}, + {{kSubstraitArithmeticFunctionsUri, "min"}, + "[1, 2, 3]", + int8(), + "[1]", + "[1, 3]", + int8()}, + {{kSubstraitArithmeticFunctionsUri, "max"}, + "[1, 2, 3]", + int8(), + "[3]", + "[2, 3]", + int8()}, + {{kSubstraitArithmeticFunctionsUri, "avg"}, + "[1, 2, 3]", + float64(), + "[2]", + "[1.5, 3]", + float64()}}; + CheckAggregateCases(test_cases); +} + +} // namespace engine +} // namespace arrow diff --git a/cpp/src/arrow/engine/substrait/options.h b/cpp/src/arrow/engine/substrait/options.h index dcb2088416f69..eace200f0acb1 100644 --- a/cpp/src/arrow/engine/substrait/options.h +++ b/cpp/src/arrow/engine/substrait/options.h @@ -54,11 +54,20 @@ enum class ConversionStrictness { BEST_EFFORT, }; +using NamedTableProvider = + std::function(const std::vector&)>; +static NamedTableProvider kDefaultNamedTableProvider; + /// Options that control the conversion between Substrait and Acero representations of a /// plan. struct ConversionOptions { /// \brief How strictly the converter should adhere to the structure of the input. ConversionStrictness strictness = ConversionStrictness::BEST_EFFORT; + /// \brief A custom strategy to be used for providing named tables + /// + /// The default behavior will return an invalid status if the plan has any + /// named table relations. + NamedTableProvider named_table_provider = kDefaultNamedTableProvider; }; } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/plan_internal.cc b/cpp/src/arrow/engine/substrait/plan_internal.cc index 2da037000cf70..b0fdb9bdc2fcd 100644 --- a/cpp/src/arrow/engine/substrait/plan_internal.cc +++ b/cpp/src/arrow/engine/substrait/plan_internal.cc @@ -74,13 +74,12 @@ Status AddExtensionSetToPlan(const ExtensionSet& ext_set, substrait::Plan* plan) } for (uint32_t anchor = 0; anchor < ext_set.num_functions(); ++anchor) { - ARROW_ASSIGN_OR_RAISE(auto function_record, ext_set.DecodeFunction(anchor)); - if (function_record.id.empty()) continue; + ARROW_ASSIGN_OR_RAISE(Id function_id, ext_set.DecodeFunction(anchor)); auto fn = internal::make_unique(); - fn->set_extension_uri_reference(map[function_record.id.uri]); + fn->set_extension_uri_reference(map[function_id.uri]); fn->set_function_anchor(anchor); - fn->set_name(function_record.id.name.to_string()); + fn->set_name(function_id.name.to_string()); auto ext_decl = internal::make_unique(); ext_decl->set_allocated_extension_function(fn.release()); @@ -104,8 +103,6 @@ Result GetExtensionSetFromPlan(const substrait::Plan& plan, // NOTE: it's acceptable to use views to memory owned by plan; ExtensionSet::Make // will only store views to memory owned by registry. - using Id = ExtensionSet::Id; - std::unordered_map type_ids, function_ids; for (const auto& ext : plan.extensions()) { switch (ext.mapping_type_case()) { diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc index 8cc1da4d9030a..c5c02f51558c9 100644 --- a/cpp/src/arrow/engine/substrait/relation_internal.cc +++ b/cpp/src/arrow/engine/substrait/relation_internal.cc @@ -67,6 +67,7 @@ Result FromProto(const substrait::Rel& rel, const ExtensionSet& ARROW_ASSIGN_OR_RAISE(auto base_schema, FromProto(read.base_schema(), ext_set, conversion_options)); + auto num_columns = static_cast(base_schema->fields().size()); auto scan_options = std::make_shared(); scan_options->use_threads = true; @@ -82,6 +83,22 @@ Result FromProto(const substrait::Rel& rel, const ExtensionSet& return Status::NotImplemented("substrait::ReadRel::projection"); } + if (read.has_named_table()) { + if (!conversion_options.named_table_provider) { + return Status::Invalid( + "plan contained a named table but a NamedTableProvider has not been " + "configured"); + } + const NamedTableProvider& named_table_provider = + conversion_options.named_table_provider; + const substrait::ReadRel::NamedTable& named_table = read.named_table(); + std::vector table_names(named_table.names().begin(), + named_table.names().end()); + ARROW_ASSIGN_OR_RAISE(compute::Declaration source_decl, + named_table_provider(table_names)); + return DeclarationInfo{std::move(source_decl), num_columns}; + } + if (!read.has_local_files()) { return Status::NotImplemented( "substrait::ReadRel with read_type other than LocalFiles"); @@ -182,7 +199,6 @@ Result FromProto(const substrait::Rel& rel, const ExtensionSet& std::move(filesystem), std::move(files), std::move(format), {})); - auto num_columns = static_cast(base_schema->fields().size()); ARROW_ASSIGN_OR_RAISE(auto ds, ds_factory->Finish(std::move(base_schema))); return DeclarationInfo{ @@ -349,17 +365,20 @@ Result FromProto(const substrait::Rel& rel, const ExtensionSet& "than one item"); } std::vector keys; - auto group = aggregate.groupings(0); - keys.reserve(group.grouping_expressions_size()); - for (int exp_id = 0; exp_id < group.grouping_expressions_size(); exp_id++) { - ARROW_ASSIGN_OR_RAISE(auto expr, FromProto(group.grouping_expressions(exp_id), - ext_set, conversion_options)); - const auto* field_ref = expr.field_ref(); - if (field_ref) { - keys.emplace_back(std::move(*field_ref)); - } else { - return Status::Invalid( - "The grouping expression for an aggregate must be a direct reference."); + if (aggregate.groupings_size() > 0) { + const substrait::AggregateRel::Grouping& group = aggregate.groupings(0); + keys.reserve(group.grouping_expressions_size()); + for (int exp_id = 0; exp_id < group.grouping_expressions_size(); exp_id++) { + ARROW_ASSIGN_OR_RAISE( + compute::Expression expr, + FromProto(group.grouping_expressions(exp_id), ext_set, conversion_options)); + const FieldRef* field_ref = expr.field_ref(); + if (field_ref) { + keys.emplace_back(std::move(*field_ref)); + } else { + return Status::Invalid( + "The grouping expression for an aggregate must be a direct reference."); + } } } @@ -373,25 +392,14 @@ Result FromProto(const substrait::Rel& rel, const ExtensionSet& return Status::NotImplemented("Aggregate filters are not supported."); } const auto& agg_func = agg_measure.measure(); - if (agg_func.arguments_size() != 1) { - return Status::NotImplemented("Aggregate function must be a unary function."); - } - int func_reference = agg_func.function_reference(); - ARROW_ASSIGN_OR_RAISE(auto func_record, ext_set.DecodeFunction(func_reference)); - // aggreagte function name - auto func_name = std::string(func_record.id.name); - // aggregate target - auto subs_func_args = agg_func.arguments(0); - ARROW_ASSIGN_OR_RAISE(auto field_expr, FromProto(subs_func_args.value(), - ext_set, conversion_options)); - auto target = field_expr.field_ref(); - if (!target) { - return Status::Invalid( - "The input expression to an aggregate function must be a direct " - "reference."); - } - aggregates.emplace_back(compute::Aggregate{std::move(func_name), NULLPTR, - std::move(*target), std::move("")}); + ARROW_ASSIGN_OR_RAISE( + SubstraitCall aggregate_call, + FromProto(agg_func, !keys.empty(), ext_set, conversion_options)); + ARROW_ASSIGN_OR_RAISE( + ExtensionIdRegistry::SubstraitAggregateToArrow converter, + ext_set.registry()->GetSubstraitAggregateToArrow(aggregate_call.id())); + ARROW_ASSIGN_OR_RAISE(compute::Aggregate arrow_agg, converter(aggregate_call)); + aggregates.push_back(std::move(arrow_agg)); } else { return Status::Invalid("substrait::AggregateFunction not provided"); } diff --git a/cpp/src/arrow/engine/substrait/serde.cc b/cpp/src/arrow/engine/substrait/serde.cc index 87ad88dccb45c..9f7d979e2f02e 100644 --- a/cpp/src/arrow/engine/substrait/serde.cc +++ b/cpp/src/arrow/engine/substrait/serde.cc @@ -172,7 +172,7 @@ Result> MakeSingleDeclarationPlan( } else { ARROW_ASSIGN_OR_RAISE(auto plan, compute::ExecPlan::Make()); ARROW_RETURN_NOT_OK(declarations[0].AddToPlan(plan.get())); - return plan; + return std::move(plan); } } @@ -182,17 +182,21 @@ Result> DeserializePlan( const Buffer& buf, const std::shared_ptr& consumer, const ExtensionIdRegistry* registry, ExtensionSet* ext_set_out, const ConversionOptions& conversion_options) { - bool factory_done = false; - auto single_consumer = [&factory_done, &consumer] { - if (factory_done) { - return std::shared_ptr{}; + struct SingleConsumer { + std::shared_ptr operator()() { + if (factory_done) { + Status::Invalid("SingleConsumer invoked more than once").Warn(); + return std::shared_ptr{}; + } + factory_done = true; + return consumer; } - factory_done = true; - return consumer; + bool factory_done; + std::shared_ptr consumer; }; - ARROW_ASSIGN_OR_RAISE( - auto declarations, - DeserializePlans(buf, single_consumer, registry, ext_set_out, conversion_options)); + ARROW_ASSIGN_OR_RAISE(auto declarations, + DeserializePlans(buf, SingleConsumer{false, consumer}, registry, + ext_set_out, conversion_options)); return MakeSingleDeclarationPlan(declarations); } diff --git a/cpp/src/arrow/engine/substrait/serde.h b/cpp/src/arrow/engine/substrait/serde.h index 5214606e1c8c7..6c2083fb56a15 100644 --- a/cpp/src/arrow/engine/substrait/serde.h +++ b/cpp/src/arrow/engine/substrait/serde.h @@ -75,7 +75,7 @@ ARROW_ENGINE_EXPORT Result> DeserializePlans( /// Plan is returned here. /// \return an ExecNode corresponding to the single toplevel relation in the Substrait /// Plan -Result> DeserializePlan( +ARROW_ENGINE_EXPORT Result> DeserializePlan( const Buffer& buf, const std::shared_ptr& consumer, const ExtensionIdRegistry* registry = NULLPTR, ExtensionSet* ext_set_out = NULLPTR, const ConversionOptions& conversion_options = {}); diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc index 3bb4de4e920a0..04405b316807d 100644 --- a/cpp/src/arrow/engine/substrait/serde_test.cc +++ b/cpp/src/arrow/engine/substrait/serde_test.cc @@ -701,7 +701,12 @@ TEST(Substrait, ExtensionSetFromPlan) { "extension_uris": [ { "extension_uri_anchor": 7, - "uri": ")" + substrait::default_extension_types_uri() + + "uri": ")" + default_extension_types_uri() + + R"(" + }, + { + "extension_uri_anchor": 18, + "uri": ")" + kSubstraitArithmeticFunctionsUri + R"(" } ], @@ -712,15 +717,15 @@ TEST(Substrait, ExtensionSetFromPlan) { "name": "null" }}, {"extension_function": { - "extension_uri_reference": 7, + "extension_uri_reference": 18, "function_anchor": 42, "name": "add" }} ] - })"; +})"; ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", substrait_json)); for (auto sp_ext_id_reg : - {std::shared_ptr(), substrait::MakeExtensionIdRegistry()}) { + {std::shared_ptr(), MakeExtensionIdRegistry()}) { ExtensionIdRegistry* ext_id_reg = sp_ext_id_reg.get(); ExtensionSet ext_set(ext_id_reg); ASSERT_OK_AND_ASSIGN(auto sink_decls, @@ -732,10 +737,9 @@ TEST(Substrait, ExtensionSetFromPlan) { EXPECT_EQ(decoded_null_type.id.name, "null"); EXPECT_EQ(*decoded_null_type.type, NullType()); - EXPECT_OK_AND_ASSIGN(auto decoded_add_func, ext_set.DecodeFunction(42)); - EXPECT_EQ(decoded_add_func.id.uri, kArrowExtTypesUri); - EXPECT_EQ(decoded_add_func.id.name, "add"); - EXPECT_EQ(decoded_add_func.name, "add"); + EXPECT_OK_AND_ASSIGN(Id decoded_add_func_id, ext_set.DecodeFunction(42)); + EXPECT_EQ(decoded_add_func_id.uri, kSubstraitArithmeticFunctionsUri); + EXPECT_EQ(decoded_add_func_id.name, "add"); } } @@ -745,7 +749,7 @@ TEST(Substrait, ExtensionSetFromPlanMissingFunc) { "extension_uris": [ { "extension_uri_anchor": 7, - "uri": ")" + substrait::default_extension_types_uri() + + "uri": ")" + default_extension_types_uri() + R"(" } ], @@ -760,7 +764,7 @@ TEST(Substrait, ExtensionSetFromPlanMissingFunc) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", substrait_json)); for (auto sp_ext_id_reg : - {std::shared_ptr(), substrait::MakeExtensionIdRegistry()}) { + {std::shared_ptr(), MakeExtensionIdRegistry()}) { ExtensionIdRegistry* ext_id_reg = sp_ext_id_reg.get(); ExtensionSet ext_set(ext_id_reg); ASSERT_RAISES(Invalid, DeserializePlans( @@ -786,7 +790,7 @@ TEST(Substrait, ExtensionSetFromPlanExhaustedFactory) { "extension_uris": [ { "extension_uri_anchor": 7, - "uri": ")" + substrait::default_extension_types_uri() + + "uri": ")" + default_extension_types_uri() + R"(" } ], @@ -801,7 +805,7 @@ TEST(Substrait, ExtensionSetFromPlanExhaustedFactory) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", substrait_json)); for (auto sp_ext_id_reg : - {std::shared_ptr(), substrait::MakeExtensionIdRegistry()}) { + {std::shared_ptr(), MakeExtensionIdRegistry()}) { ExtensionIdRegistry* ext_id_reg = sp_ext_id_reg.get(); ExtensionSet ext_set(ext_id_reg); ASSERT_RAISES( @@ -823,7 +827,7 @@ TEST(Substrait, ExtensionSetFromPlanRegisterFunc) { "extension_uris": [ { "extension_uri_anchor": 7, - "uri": ")" + substrait::default_extension_types_uri() + + "uri": ")" + default_extension_types_uri() + R"(" } ], @@ -837,24 +841,23 @@ TEST(Substrait, ExtensionSetFromPlanRegisterFunc) { })"; ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", substrait_json)); - auto sp_ext_id_reg = substrait::MakeExtensionIdRegistry(); + auto sp_ext_id_reg = MakeExtensionIdRegistry(); ExtensionIdRegistry* ext_id_reg = sp_ext_id_reg.get(); // invalid before registration ExtensionSet ext_set_invalid(ext_id_reg); ASSERT_RAISES(Invalid, DeserializePlans( *buf, [] { return kNullConsumer; }, ext_id_reg, &ext_set_invalid)); - ASSERT_OK(substrait::RegisterFunction( - *ext_id_reg, substrait::default_extension_types_uri(), "new_func", "multiply")); + ASSERT_OK(ext_id_reg->AddSubstraitCallToArrow( + {default_extension_types_uri(), "new_func"}, "multiply")); // valid after registration ExtensionSet ext_set_valid(ext_id_reg); ASSERT_OK_AND_ASSIGN(auto sink_decls, DeserializePlans( *buf, [] { return kNullConsumer; }, ext_id_reg, &ext_set_valid)); - EXPECT_OK_AND_ASSIGN(auto decoded_add_func, ext_set_valid.DecodeFunction(42)); - EXPECT_EQ(decoded_add_func.id.uri, kArrowExtTypesUri); - EXPECT_EQ(decoded_add_func.id.name, "new_func"); - EXPECT_EQ(decoded_add_func.name, "multiply"); + EXPECT_OK_AND_ASSIGN(Id decoded_add_func_id, ext_set_valid.DecodeFunction(42)); + EXPECT_EQ(decoded_add_func_id.uri, kArrowExtTypesUri); + EXPECT_EQ(decoded_add_func_id.name, "new_func"); } Result GetSubstraitJSON() { @@ -900,7 +903,7 @@ TEST(Substrait, DeserializeWithConsumerFactory) { GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #else ASSERT_OK_AND_ASSIGN(std::string substrait_json, GetSubstraitJSON()); - ASSERT_OK_AND_ASSIGN(auto buf, substrait::SerializeJsonPlan(substrait_json)); + ASSERT_OK_AND_ASSIGN(auto buf, SerializeJsonPlan(substrait_json)); ASSERT_OK_AND_ASSIGN(auto declarations, DeserializePlans(*buf, NullSinkNodeConsumer::Make)); ASSERT_EQ(declarations.size(), 1); @@ -923,7 +926,7 @@ TEST(Substrait, DeserializeSinglePlanWithConsumerFactory) { GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows"; #else ASSERT_OK_AND_ASSIGN(std::string substrait_json, GetSubstraitJSON()); - ASSERT_OK_AND_ASSIGN(auto buf, substrait::SerializeJsonPlan(substrait_json)); + ASSERT_OK_AND_ASSIGN(auto buf, SerializeJsonPlan(substrait_json)); ASSERT_OK_AND_ASSIGN(std::shared_ptr plan, DeserializePlan(*buf, NullSinkNodeConsumer::Make())); ASSERT_EQ(1, plan->sinks().size()); @@ -960,7 +963,7 @@ TEST(Substrait, DeserializeWithWriteOptionsFactory) { return std::make_shared(options); }; ASSERT_OK_AND_ASSIGN(std::string substrait_json, GetSubstraitJSON()); - ASSERT_OK_AND_ASSIGN(auto buf, substrait::SerializeJsonPlan(substrait_json)); + ASSERT_OK_AND_ASSIGN(auto buf, SerializeJsonPlan(substrait_json)); ASSERT_OK_AND_ASSIGN(auto declarations, DeserializePlans(*buf, write_options_factory)); ASSERT_EQ(declarations.size(), 1); compute::Declaration* decl = &declarations[0]; @@ -984,7 +987,7 @@ TEST(Substrait, DeserializeWithWriteOptionsFactory) { static void test_with_registries( std::function test) { auto default_func_reg = compute::GetFunctionRegistry(); - auto nested_ext_id_reg = substrait::MakeExtensionIdRegistry(); + auto nested_ext_id_reg = MakeExtensionIdRegistry(); auto nested_func_reg = compute::FunctionRegistry::Make(default_func_reg); test(nullptr, default_func_reg); test(nullptr, nested_func_reg.get()); @@ -999,8 +1002,8 @@ TEST(Substrait, GetRecordBatchReader) { ASSERT_OK_AND_ASSIGN(std::string substrait_json, GetSubstraitJSON()); test_with_registries([&substrait_json](ExtensionIdRegistry* ext_id_reg, compute::FunctionRegistry* func_registry) { - ASSERT_OK_AND_ASSIGN(auto buf, substrait::SerializeJsonPlan(substrait_json)); - ASSERT_OK_AND_ASSIGN(auto reader, substrait::ExecuteSerializedPlan(*buf)); + ASSERT_OK_AND_ASSIGN(auto buf, SerializeJsonPlan(substrait_json)); + ASSERT_OK_AND_ASSIGN(auto reader, ExecuteSerializedPlan(*buf)); ASSERT_OK_AND_ASSIGN(auto table, Table::FromRecordBatchReader(reader.get())); // Note: assuming the binary.parquet file contains fixed amount of records // in case of a test failure, re-evalaute the content in the file @@ -1016,8 +1019,8 @@ TEST(Substrait, InvalidPlan) { })"; test_with_registries([&substrait_json](ExtensionIdRegistry* ext_id_reg, compute::FunctionRegistry* func_registry) { - ASSERT_OK_AND_ASSIGN(auto buf, substrait::SerializeJsonPlan(substrait_json)); - ASSERT_RAISES(Invalid, substrait::ExecuteSerializedPlan(*buf)); + ASSERT_OK_AND_ASSIGN(auto buf, SerializeJsonPlan(substrait_json)); + ASSERT_RAISES(Invalid, ExecuteSerializedPlan(*buf)); }); } @@ -1101,7 +1104,10 @@ TEST(Substrait, JoinPlanBasic) { } } } - }] + }], + "output_type": { + "bool": {} + } } }, "type": "JOIN_TYPE_INNER" @@ -1111,7 +1117,7 @@ TEST(Substrait, JoinPlanBasic) { "extension_uris": [ { "extension_uri_anchor": 0, - "uri": ")" + substrait::default_extension_types_uri() + + "uri": ")" + std::string(kSubstraitComparisonFunctionsUri) + R"(" } ], @@ -1125,7 +1131,7 @@ TEST(Substrait, JoinPlanBasic) { })"; ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", substrait_json)); for (auto sp_ext_id_reg : - {std::shared_ptr(), substrait::MakeExtensionIdRegistry()}) { + {std::shared_ptr(), MakeExtensionIdRegistry()}) { ExtensionIdRegistry* ext_id_reg = sp_ext_id_reg.get(); ExtensionSet ext_set(ext_id_reg); ASSERT_OK_AND_ASSIGN(auto sink_decls, @@ -1241,7 +1247,10 @@ TEST(Substrait, JoinPlanInvalidKeyCmp) { } } } - }] + }], + "output_type": { + "bool": {} + } } }, "type": "JOIN_TYPE_INNER" @@ -1251,7 +1260,7 @@ TEST(Substrait, JoinPlanInvalidKeyCmp) { "extension_uris": [ { "extension_uri_anchor": 0, - "uri": ")" + substrait::default_extension_types_uri() + + "uri": ")" + std::string(kSubstraitArithmeticFunctionsUri) + R"(" } ], @@ -1265,7 +1274,7 @@ TEST(Substrait, JoinPlanInvalidKeyCmp) { })"; ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", substrait_json)); for (auto sp_ext_id_reg : - {std::shared_ptr(), substrait::MakeExtensionIdRegistry()}) { + {std::shared_ptr(), MakeExtensionIdRegistry()}) { ExtensionIdRegistry* ext_id_reg = sp_ext_id_reg.get(); ExtensionSet ext_set(ext_id_reg); ASSERT_RAISES(Invalid, DeserializePlans( @@ -1333,7 +1342,7 @@ TEST(Substrait, JoinPlanInvalidExpression) { }] })")); for (auto sp_ext_id_reg : - {std::shared_ptr(), substrait::MakeExtensionIdRegistry()}) { + {std::shared_ptr(), MakeExtensionIdRegistry()}) { ExtensionIdRegistry* ext_id_reg = sp_ext_id_reg.get(); ExtensionSet ext_set(ext_id_reg); ASSERT_RAISES(Invalid, DeserializePlans( @@ -1406,7 +1415,7 @@ TEST(Substrait, JoinPlanInvalidKeys) { }] })")); for (auto sp_ext_id_reg : - {std::shared_ptr(), substrait::MakeExtensionIdRegistry()}) { + {std::shared_ptr(), MakeExtensionIdRegistry()}) { ExtensionIdRegistry* ext_id_reg = sp_ext_id_reg.get(); ExtensionSet ext_set(ext_id_reg); ASSERT_RAISES(Invalid, DeserializePlans( @@ -1470,6 +1479,7 @@ TEST(Substrait, AggregateBasic) { }], "sorts": [], "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "invocation": "AGGREGATION_INVOCATION_ALL", "outputType": { "i64": {} } @@ -1480,18 +1490,18 @@ TEST(Substrait, AggregateBasic) { }], "extensionUris": [{ "extension_uri_anchor": 0, - "uri": "https://github.com/apache/arrow/blob/master/format/substrait/extension_types.yaml" + "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml" }], "extensions": [{ "extension_function": { "extension_uri_reference": 0, "function_anchor": 0, - "name": "hash_count" + "name": "sum" } }], })")); - auto sp_ext_id_reg = substrait::MakeExtensionIdRegistry(); + auto sp_ext_id_reg = MakeExtensionIdRegistry(); ASSERT_OK_AND_ASSIGN(auto sink_decls, DeserializePlans(*buf, [] { return kNullConsumer; })); auto agg_decl = sink_decls[0].inputs[0]; @@ -1503,7 +1513,7 @@ TEST(Substrait, AggregateBasic) { EXPECT_EQ(agg_rel->factory_name, "aggregate"); EXPECT_EQ(agg_options.aggregates[0].name, ""); - EXPECT_EQ(agg_options.aggregates[0].function, "hash_count"); + EXPECT_EQ(agg_options.aggregates[0].function, "hash_sum"); } TEST(Substrait, AggregateInvalidRel) { @@ -1516,13 +1526,13 @@ TEST(Substrait, AggregateInvalidRel) { }], "extensionUris": [{ "extension_uri_anchor": 0, - "uri": "https://github.com/apache/arrow/blob/master/format/substrait/extension_types.yaml" + "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml" }], "extensions": [{ "extension_function": { "extension_uri_reference": 0, "function_anchor": 0, - "name": "hash_count" + "name": "sum" } }], })")); @@ -1577,13 +1587,13 @@ TEST(Substrait, AggregateInvalidFunction) { }], "extensionUris": [{ "extension_uri_anchor": 0, - "uri": "https://github.com/apache/arrow/blob/master/format/substrait/extension_types.yaml" + "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml" }], "extensions": [{ "extension_function": { "extension_uri_reference": 0, "function_anchor": 0, - "name": "hash_count" + "name": "sum" } }], })")); @@ -1637,6 +1647,7 @@ TEST(Substrait, AggregateInvalidAggFuncArgs) { "args": [], "sorts": [], "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "invocation": "AGGREGATION_INVOCATION_ALL", "outputType": { "i64": {} } @@ -1647,13 +1658,13 @@ TEST(Substrait, AggregateInvalidAggFuncArgs) { }], "extensionUris": [{ "extension_uri_anchor": 0, - "uri": "https://github.com/apache/arrow/blob/master/format/substrait/extension_types.yaml" + "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml" }], "extensions": [{ "extension_function": { "extension_uri_reference": 0, "function_anchor": 0, - "name": "hash_count" + "name": "sum" } }], })")); @@ -1707,6 +1718,78 @@ TEST(Substrait, AggregateWithFilter) { "args": [], "sorts": [], "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "invocation": "AGGREGATION_INVOCATION_ALL", + "outputType": { + "i64": {} + } + } + }] + } + } + }], + "extensionUris": [{ + "extension_uri_anchor": 0, + "uri": "https://github.com/apache/arrow/blob/master/format/substrait/extension_types.yaml" + }], + "extensions": [{ + "extension_function": { + "extension_uri_reference": 0, + "function_anchor": 0, + "name": "equal" + } + }], + })")); + + ASSERT_RAISES(NotImplemented, DeserializePlans(*buf, [] { return kNullConsumer; })); +} + +TEST(Substrait, AggregateBadPhase) { + ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Plan", R"({ + "relations": [{ + "rel": { + "aggregate": { + "input": { + "read": { + "base_schema": { + "names": ["A", "B", "C"], + "struct": { + "types": [{ + "i32": {} + }, { + "i32": {} + }, { + "i32": {} + }] + } + }, + "local_files": { + "items": [ + { + "uri_file": "file:///tmp/dat.parquet", + "parquet": {} + } + ] + } + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 0, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "invocation": "AGGREGATION_INVOCATION_DISTINCT", "outputType": { "i64": {} } diff --git a/cpp/src/arrow/engine/substrait/test_plan_builder.cc b/cpp/src/arrow/engine/substrait/test_plan_builder.cc new file mode 100644 index 0000000000000..3bd373ae5fa56 --- /dev/null +++ b/cpp/src/arrow/engine/substrait/test_plan_builder.cc @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/engine/substrait/test_plan_builder.h" + +#include + +#include "arrow/compute/exec/exec_plan.h" +#include "arrow/engine/substrait/plan_internal.h" +#include "arrow/engine/substrait/type_internal.h" +#include "arrow/util/macros.h" +#include "arrow/util/make_unique.h" + +#include "substrait/algebra.pb.h" +#include "substrait/plan.pb.h" +#include "substrait/type.pb.h" + +namespace arrow { + +using internal::make_unique; + +namespace engine { +namespace internal { + +static const ConversionOptions kPlanBuilderConversionOptions; + +Result> CreateRead(const Table& table, + ExtensionSet* ext_set) { + auto read = make_unique(); + + ARROW_ASSIGN_OR_RAISE(std::unique_ptr schema, + ToProto(*table.schema(), ext_set, kPlanBuilderConversionOptions)); + read->set_allocated_base_schema(schema.release()); + + auto named_table = make_unique(); + named_table->add_names("test"); + read->set_allocated_named_table(named_table.release()); + + return read; +} + +void CreateDirectReference(int32_t index, substrait::Expression* expr) { + auto reference = make_unique(); + auto reference_segment = make_unique(); + auto struct_field = make_unique(); + struct_field->set_field(index); + reference_segment->set_allocated_struct_field(struct_field.release()); + reference->set_allocated_direct_reference(reference_segment.release()); + + auto root_reference = + make_unique(); + reference->set_allocated_root_reference(root_reference.release()); + expr->set_allocated_selection(reference.release()); +} + +Result> CreateProject( + Id function_id, const std::vector& arguments, + const std::vector>& arg_types, const DataType& output_type, + ExtensionSet* ext_set) { + auto project = make_unique(); + + auto call = make_unique(); + ARROW_ASSIGN_OR_RAISE(uint32_t function_anchor, ext_set->EncodeFunction(function_id)); + call->set_function_reference(function_anchor); + + std::size_t arg_index = 0; + std::size_t table_arg_index = 0; + for (const std::shared_ptr& arg_type : arg_types) { + substrait::FunctionArgument* argument = call->add_arguments(); + if (arg_type) { + // If it has a type then it's a reference to the input table + auto expression = make_unique(); + CreateDirectReference(static_cast(table_arg_index++), expression.get()); + argument->set_allocated_value(expression.release()); + } else { + // If it doesn't have a type then it's an enum + const std::string& enum_value = arguments[arg_index]; + auto enum_ = make_unique(); + if (enum_value.size() > 0) { + enum_->set_specified(enum_value); + } else { + auto unspecified = make_unique(); + enum_->set_allocated_unspecified(unspecified.release()); + } + argument->set_allocated_enum_(enum_.release()); + } + arg_index++; + } + + ARROW_ASSIGN_OR_RAISE( + std::unique_ptr output_type_substrait, + ToProto(output_type, /*nullable=*/true, ext_set, kPlanBuilderConversionOptions)); + call->set_allocated_output_type(output_type_substrait.release()); + + substrait::Expression* call_expression = project->add_expressions(); + call_expression->set_allocated_scalar_function(call.release()); + + return project; +} + +Result> CreateAgg(Id function_id, + const std::vector& keys, + int arg_idx, + const DataType& output_type, + ExtensionSet* ext_set) { + auto agg = make_unique(); + + if (!keys.empty()) { + substrait::AggregateRel::Grouping* grouping = agg->add_groupings(); + for (int key : keys) { + substrait::Expression* key_expr = grouping->add_grouping_expressions(); + CreateDirectReference(key, key_expr); + } + } + + substrait::AggregateRel::Measure* measure_wrapper = agg->add_measures(); + auto agg_func = make_unique(); + ARROW_ASSIGN_OR_RAISE(uint32_t function_anchor, ext_set->EncodeFunction(function_id)); + + agg_func->set_function_reference(function_anchor); + + substrait::FunctionArgument* arg = agg_func->add_arguments(); + auto arg_expr = make_unique(); + CreateDirectReference(arg_idx, arg_expr.get()); + arg->set_allocated_value(arg_expr.release()); + + agg_func->set_phase(substrait::AggregationPhase::AGGREGATION_PHASE_INITIAL_TO_RESULT); + agg_func->set_invocation( + substrait::AggregateFunction::AggregationInvocation:: + AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_ALL); + + ARROW_ASSIGN_OR_RAISE( + std::unique_ptr output_type_substrait, + ToProto(output_type, /*nullable=*/true, ext_set, kPlanBuilderConversionOptions)); + agg_func->set_allocated_output_type(output_type_substrait.release()); + measure_wrapper->set_allocated_measure(agg_func.release()); + + return agg; +} + +Result> CreatePlan(std::unique_ptr root, + ExtensionSet* ext_set) { + auto plan = make_unique(); + + substrait::PlanRel* plan_rel = plan->add_relations(); + auto rel_root = make_unique(); + rel_root->set_allocated_input(root.release()); + plan_rel->set_allocated_root(rel_root.release()); + + ARROW_RETURN_NOT_OK(AddExtensionSetToPlan(*ext_set, plan.get())); + return plan; +} + +Result> CreateScanProjectSubstrait( + Id function_id, const std::shared_ptr
& input_table, + const std::vector& arguments, + const std::vector>& data_types, + const DataType& output_type) { + ExtensionSet ext_set; + ARROW_ASSIGN_OR_RAISE(std::unique_ptr read, + CreateRead(*input_table, &ext_set)); + ARROW_ASSIGN_OR_RAISE( + std::unique_ptr project, + CreateProject(function_id, arguments, data_types, output_type, &ext_set)); + + auto read_rel = make_unique(); + read_rel->set_allocated_read(read.release()); + project->set_allocated_input(read_rel.release()); + + auto project_rel = make_unique(); + project_rel->set_allocated_project(project.release()); + + ARROW_ASSIGN_OR_RAISE(std::unique_ptr plan, + CreatePlan(std::move(project_rel), &ext_set)); + return Buffer::FromString(plan->SerializeAsString()); +} + +Result> CreateScanAggSubstrait( + Id function_id, const std::shared_ptr
& input_table, + const std::vector& key_idxs, int arg_idx, const DataType& output_type) { + ExtensionSet ext_set; + + ARROW_ASSIGN_OR_RAISE(std::unique_ptr read, + CreateRead(*input_table, &ext_set)); + ARROW_ASSIGN_OR_RAISE(std::unique_ptr agg, + CreateAgg(function_id, key_idxs, arg_idx, output_type, &ext_set)); + + auto read_rel = make_unique(); + read_rel->set_allocated_read(read.release()); + agg->set_allocated_input(read_rel.release()); + + auto agg_rel = make_unique(); + agg_rel->set_allocated_aggregate(agg.release()); + + ARROW_ASSIGN_OR_RAISE(std::unique_ptr plan, + CreatePlan(std::move(agg_rel), &ext_set)); + return Buffer::FromString(plan->SerializeAsString()); +} + +} // namespace internal +} // namespace engine +} // namespace arrow diff --git a/cpp/src/arrow/engine/substrait/test_plan_builder.h b/cpp/src/arrow/engine/substrait/test_plan_builder.h new file mode 100644 index 0000000000000..9d2d97a8cc9cc --- /dev/null +++ b/cpp/src/arrow/engine/substrait/test_plan_builder.h @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// These utilities are for internal / unit test use only. +// They allow for the construction of simple Substrait plans +// programmatically without first requiring the construction +// of an ExecPlan + +// These utilities have to be here, and not in a test_util.cc +// file (or in a unit test) because only one .so is allowed +// to include each .pb.h file or else protobuf will encounter +// global namespace conflicts. + +#include +#include +#include + +#include "arrow/buffer.h" +#include "arrow/engine/substrait/extension_set.h" +#include "arrow/result.h" +#include "arrow/table.h" +#include "arrow/type.h" + +namespace arrow { +namespace engine { +namespace internal { + +/// \brief Create a scan->project->sink plan for tests +/// +/// The plan will project one additional column using the function +/// defined by `function_id`, `arguments`, and data_types. `arguments` +/// and `data_types` should have the same length but only one of each +/// should be defined at each index. +/// +/// If `data_types` is defined at an index then the plan will create a +/// direct reference (starting at index 0 and increasing by 1 for each +/// argument of this type). +/// +/// If `arguments` is defined at an index then the plan will create an +/// enum argument with that value. +ARROW_ENGINE_EXPORT Result> CreateScanProjectSubstrait( + Id function_id, const std::shared_ptr
& input_table, + const std::vector& arguments, + const std::vector>& data_types, + const DataType& output_type); + +/// \brief Create a scan->aggregate->sink plan for tests +/// +/// The plan will create an aggregate with one grouping set (defined by +/// key_idxs) and one measure. The measure will be a unary function +/// defined by `function_id` and a direct reference to `arg_idx`. +ARROW_ENGINE_EXPORT Result> CreateScanAggSubstrait( + Id function_id, const std::shared_ptr
& input_table, + const std::vector& key_idxs, int arg_idx, const DataType& output_type); + +} // namespace internal +} // namespace engine +} // namespace arrow diff --git a/cpp/src/arrow/engine/substrait/util.cc b/cpp/src/arrow/engine/substrait/util.cc index 36240d468278c..936bde5c652e5 100644 --- a/cpp/src/arrow/engine/substrait/util.cc +++ b/cpp/src/arrow/engine/substrait/util.cc @@ -23,8 +23,6 @@ namespace arrow { namespace engine { -namespace substrait { - namespace { /// \brief A SinkNodeConsumer specialized to output ExecBatches via PushGenerator @@ -136,19 +134,11 @@ std::shared_ptr MakeExtensionIdRegistry() { return nested_extension_id_registry(default_extension_id_registry()); } -Status RegisterFunction(ExtensionIdRegistry& registry, const std::string& id_uri, - const std::string& id_name, - const std::string& arrow_function_name) { - return registry.RegisterFunction(id_uri, id_name, arrow_function_name); -} - const std::string& default_extension_types_uri() { static std::string uri = engine::kArrowExtTypesUri.to_string(); return uri; } -} // namespace substrait - } // namespace engine } // namespace arrow diff --git a/cpp/src/arrow/engine/substrait/util.h b/cpp/src/arrow/engine/substrait/util.h index 134d633bb33d3..3ac9320e1da76 100644 --- a/cpp/src/arrow/engine/substrait/util.h +++ b/cpp/src/arrow/engine/substrait/util.h @@ -27,8 +27,6 @@ namespace arrow { namespace engine { -namespace substrait { - /// \brief Retrieve a RecordBatchReader from a Substrait plan. ARROW_ENGINE_EXPORT Result> ExecuteSerializedPlan( const Buffer& substrait_buffer, const ExtensionIdRegistry* registry = NULLPTR, @@ -43,24 +41,8 @@ ARROW_ENGINE_EXPORT Result> SerializeJsonPlan( /// See arrow::engine::nested_extension_id_registry for details. ARROW_ENGINE_EXPORT std::shared_ptr MakeExtensionIdRegistry(); -/// \brief Register a function manually. -/// -/// Register an arrow function name by an ID, defined by a URI and a name, on a given -/// extension-id-registry. -/// -/// \param[in] registry an extension-id-registry to use -/// \param[in] id_uri a URI of the ID to register by -/// \param[in] id_name a name of the ID to register by -/// \param[in] arrow_function_name name of arrow function to register -ARROW_ENGINE_EXPORT Status RegisterFunction(ExtensionIdRegistry& registry, - const std::string& id_uri, - const std::string& id_name, - const std::string& arrow_function_name); - ARROW_ENGINE_EXPORT const std::string& default_extension_types_uri(); -} // namespace substrait - } // namespace engine } // namespace arrow diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index 1a36949fd629f..6888231a35a6e 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -28,6 +28,16 @@ add_arrow_test(filesystem-test EXTRA_LABELS filesystem) +if(ARROW_BUILD_BENCHMARKS) + add_arrow_benchmark(localfs_benchmark + PREFIX + "arrow-filesystem" + SOURCES + localfs_benchmark.cc + STATIC_LINK_LIBS + ${ARROW_BENCHMARK_LINK_LIBS}) +endif() + if(ARROW_GCS) add_arrow_test(gcsfs_test EXTRA_LABELS diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc index 889775d7250a3..585131ecc5eb7 100644 --- a/cpp/src/arrow/filesystem/localfs.cc +++ b/cpp/src/arrow/filesystem/localfs.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -29,12 +30,15 @@ #include #endif +#include "arrow/filesystem/filesystem.h" #include "arrow/filesystem/localfs.h" #include "arrow/filesystem/path_util.h" +#include "arrow/filesystem/type_fwd.h" #include "arrow/filesystem/util_internal.h" #include "arrow/io/file.h" +#include "arrow/io/type_fwd.h" +#include "arrow/util/async_generator.h" #include "arrow/util/io_util.h" -#include "arrow/util/logging.h" #include "arrow/util/uri.h" #include "arrow/util/windows_fixup.h" @@ -243,7 +247,8 @@ LocalFileSystemOptions LocalFileSystemOptions::Defaults() { } bool LocalFileSystemOptions::Equals(const LocalFileSystemOptions& other) const { - return use_mmap == other.use_mmap; + return use_mmap == other.use_mmap && directory_readahead == other.directory_readahead && + file_info_batch_size == other.file_info_batch_size; } Result LocalFileSystemOptions::FromUri( @@ -309,6 +314,241 @@ Result> LocalFileSystem::GetFileInfo(const FileSelector& s return results; } +namespace { + +/// Workhorse for streaming async implementation of `GetFileInfo` +/// (`GetFileInfoGenerator`). +/// +/// There are two variants of async discovery functions suported: +/// 1. `DiscoverDirectoryFiles`, which parallelizes traversal of individual directories +/// so that each directory results are yielded as a separate `FileInfoGenerator` via +/// an underlying `DiscoveryImplIterator`, which delivers items in chunks (default size +/// is 1K items). +/// 2. `DiscoverDirectoriesFlattened`, which forwards execution to the +/// `DiscoverDirectoryFiles`, with the difference that the results from individual +/// sub-directory iterators are merged into the single FileInfoGenerator stream. +/// +/// The implementation makes use of additional attributes in `LocalFileSystemOptions`, +/// such as `directory_readahead`, which can be used to tune algorithm +/// behavior and adjust how many directories can be processed in parallel. +/// This option is disabled by default, so that individual directories are processed +/// in serial manner via `MakeConcatenatedGenerator` under the hood. +class AsyncStatSelector { + public: + using FileInfoGeneratorProducer = PushGenerator::Producer; + + /// Discovery state, which is shared among all `DiscoveryImplGenerator`:s, + /// spawned by a single discovery operation (`DiscoverDirectoryFiles()`). + /// + /// The sole purpose of this struct is to handle automatic closing the + /// producer side of the resulting `FileInfoGenerator`. I.e. the producer + /// is kept alive until all discovery iterators are exhausted, in which case + /// `producer.Close()` is called automatically when ref-count for the state + /// reaches zero (which is equivalent to finishing the file discovery + /// process). + struct DiscoveryState { + FileInfoGeneratorProducer producer; + + explicit DiscoveryState(FileInfoGeneratorProducer p) : producer(std::move(p)) {} + ~DiscoveryState() { producer.Close(); } + }; + + /// The main procedure to start async streaming discovery using a given `FileSelector`. + /// + /// The result is a two-level generator, i.e. "generator of FileInfoGenerator:s", + /// where each individual generator represents an FileInfo item stream from coming an + /// individual sub-directory under the selector's `base_dir`. + static Result> DiscoverDirectoryFiles( + FileSelector selector, LocalFileSystemOptions fs_opts, + const io::IOContext& io_context) { + PushGenerator file_gen; + + ARROW_ASSIGN_OR_RAISE( + auto base_dir, arrow::internal::PlatformFilename::FromString(selector.base_dir)); + ARROW_RETURN_NOT_OK(DoDiscovery(std::move(base_dir), 0, std::move(selector), + std::make_shared(file_gen.producer()), + io_context, fs_opts.file_info_batch_size)); + + return file_gen; + } + + /// Version of `DiscoverDirectoryFiles` which flattens the stream of generators + /// into a single FileInfoGenerator stream. + /// Makes use of `LocalFileSystemOptions::directory_readahead` to determine how much + /// readahead should happen. + static arrow::Result DiscoverDirectoriesFlattened( + FileSelector selector, LocalFileSystemOptions fs_opts, + const io::IOContext& io_context) { + int32_t dir_readahead = fs_opts.directory_readahead; + ARROW_ASSIGN_OR_RAISE( + auto part_gen, + DiscoverDirectoryFiles(std::move(selector), std::move(fs_opts), io_context)); + return dir_readahead > 1 + ? MakeSequencedMergedGenerator(std::move(part_gen), dir_readahead) + : MakeConcatenatedGenerator(std::move(part_gen)); + } + + private: + /// The class, which implements iterator interface to traverse a given + /// directory at the fixed nesting depth, and possibly recurses into + /// sub-directories (if specified by the selector), spawning more + /// `DiscoveryImplIterators`, which feed their data into a single producer. + class DiscoveryImplIterator { + const PlatformFilename dir_fn_; + const int32_t nesting_depth_; + const FileSelector selector_; + const uint32_t file_info_batch_size_; + + const io::IOContext& io_context_; + std::shared_ptr discovery_state_; + FileInfoVector current_chunk_; + std::vector child_fns_; + size_t idx_ = 0; + bool initialized_ = false; + + public: + DiscoveryImplIterator(PlatformFilename dir_fn, int32_t nesting_depth, + FileSelector selector, + std::shared_ptr discovery_state, + const io::IOContext& io_context, uint32_t file_info_batch_size) + : dir_fn_(std::move(dir_fn)), + nesting_depth_(nesting_depth), + selector_(std::move(selector)), + file_info_batch_size_(file_info_batch_size), + io_context_(io_context), + discovery_state_(std::move(discovery_state)) {} + + /// Pre-initialize the iterator by listing directory contents and caching + /// in the current instance. + Status Initialize() { + auto result = arrow::internal::ListDir(dir_fn_); + if (!result.ok()) { + auto status = result.status(); + if (selector_.allow_not_found && status.IsIOError()) { + ARROW_ASSIGN_OR_RAISE(bool exists, FileExists(dir_fn_)); + if (!exists) { + return Status::OK(); + } + } + return status; + } + child_fns_ = result.MoveValueUnsafe(); + + const size_t dirent_count = child_fns_.size(); + current_chunk_.reserve(dirent_count >= file_info_batch_size_ ? file_info_batch_size_ + : dirent_count); + + initialized_ = true; + return Status::OK(); + } + + Result Next() { + if (!initialized_) { + auto init = Initialize(); + if (!init.ok()) { + return Finish(init); + } + } + while (idx_ < child_fns_.size()) { + auto full_fn = dir_fn_.Join(child_fns_[idx_++]); + auto res = StatFile(full_fn.ToNative()); + if (!res.ok()) { + return Finish(res.status()); + } + + auto info = res.MoveValueUnsafe(); + + // Try to recurse into subdirectories, if needed. + if (info.type() == FileType::Directory && + nesting_depth_ < selector_.max_recursion && selector_.recursive) { + auto status = DoDiscovery(std::move(full_fn), nesting_depth_ + 1, selector_, + discovery_state_, io_context_, file_info_batch_size_); + if (!status.ok()) { + return Finish(status); + } + } + // Everything is ok. Add the item to the current chunk of data. + current_chunk_.emplace_back(std::move(info)); + // Keep `current_chunk_` as large, as `batch_size_`. + // Otherwise, yield the complete chunk to the caller. + if (current_chunk_.size() == file_info_batch_size_) { + FileInfoVector yield_vec = std::move(current_chunk_); + const size_t items_left = child_fns_.size() - idx_; + current_chunk_.reserve( + items_left >= file_info_batch_size_ ? file_info_batch_size_ : items_left); + return yield_vec; + } + } // while (idx_ < child_fns_.size()) + + // Flush out remaining items + if (!current_chunk_.empty()) { + return std::move(current_chunk_); + } + return Finish(); + } + + private: + /// Release reference to shared discovery state and return iteration end + /// marker to indicate that this iterator is exhausted. + Result Finish(Status status = Status::OK()) { + discovery_state_.reset(); + ARROW_RETURN_NOT_OK(status); + return IterationEnd(); + } + }; + + /// Create an instance of `DiscoveryImplIterator` under the hood for the + /// specified directory, wrap it in the `BackgroundGenerator` and feed + /// the results to the main producer queue. + /// + /// Each `DiscoveryImplIterator` maintains a reference to `DiscoveryState`, + /// which simply wraps the producer to keep it alive for the lifetime + /// of this iterator. When all references to `DiscoveryState` are invalidated, + /// the producer is closed automatically. + static Status DoDiscovery(const PlatformFilename& dir_fn, int32_t nesting_depth, + FileSelector selector, + std::shared_ptr discovery_state, + const io::IOContext& io_context, + int32_t file_info_batch_size) { + ARROW_RETURN_IF(discovery_state->producer.is_closed(), + arrow::Status::Cancelled("Discovery cancelled")); + + // Note, that here we use `MakeTransferredGenerator()` with the same + // target executor (io executor) as the current iterator is running on. + // + // This is done on purpose, since typically the user of + // `GetFileInfoGenerator()` would want to perform some more IO on the + // produced results (e.g. read the files, examine metadata etc.). + // So, it is preferable to execute the attached continuations on the same + // executor, which belongs to the IO thread pool. + ARROW_ASSIGN_OR_RAISE( + auto gen, + MakeBackgroundGenerator(Iterator(DiscoveryImplIterator( + std::move(dir_fn), nesting_depth, std::move(selector), + discovery_state, io_context, file_info_batch_size)), + io_context.executor())); + gen = MakeTransferredGenerator(std::move(gen), io_context.executor()); + ARROW_RETURN_IF(!discovery_state->producer.Push(std::move(gen)), + arrow::Status::Cancelled("Discovery cancelled")); + return arrow::Status::OK(); + } +}; + +} // anonymous namespace + +FileInfoGenerator LocalFileSystem::GetFileInfoGenerator(const FileSelector& select) { + auto path_status = ValidatePath(select.base_dir); + if (!path_status.ok()) { + return MakeFailingGenerator(path_status); + } + auto fileinfo_gen = + AsyncStatSelector::DiscoverDirectoriesFlattened(select, options(), io_context_); + if (!fileinfo_gen.ok()) { + return MakeFailingGenerator(fileinfo_gen.status()); + } + return fileinfo_gen.MoveValueUnsafe(); +} + Status LocalFileSystem::CreateDir(const std::string& path, bool recursive) { RETURN_NOT_OK(ValidatePath(path)); ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path)); diff --git a/cpp/src/arrow/filesystem/localfs.h b/cpp/src/arrow/filesystem/localfs.h index 23c8196b0593d..75eaf314e4d79 100644 --- a/cpp/src/arrow/filesystem/localfs.h +++ b/cpp/src/arrow/filesystem/localfs.h @@ -34,10 +34,29 @@ namespace fs { /// Options for the LocalFileSystem implementation. struct ARROW_EXPORT LocalFileSystemOptions { + static constexpr int32_t kDefaultDirectoryReadahead = 16; + static constexpr int32_t kDefaultFileInfoBatchSize = 1000; + /// Whether OpenInputStream and OpenInputFile return a mmap'ed file, /// or a regular one. bool use_mmap = false; + /// Options related to `GetFileInfoGenerator` interface. + + /// EXPERIMENTAL: The maximum number of directories processed in parallel + /// by `GetFileInfoGenerator`. + int32_t directory_readahead = kDefaultDirectoryReadahead; + + /// EXPERIMENTAL: The maximum number of entries aggregated into each + /// FileInfoVector chunk by `GetFileInfoGenerator`. + /// + /// Since each FileInfo entry needs a separate `stat` system call, a + /// directory with a very large number of files may take a lot of time to + /// process entirely. By generating a FileInfoVector after this chunk + /// size is reached, we ensure FileInfo entries can start being consumed + /// from the FileInfoGenerator with less initial latency. + int32_t file_info_batch_size = kDefaultFileInfoBatchSize; + /// \brief Initialize with defaults static LocalFileSystemOptions Defaults(); @@ -73,6 +92,7 @@ class ARROW_EXPORT LocalFileSystem : public FileSystem { /// \endcond Result GetFileInfo(const std::string& path) override; Result> GetFileInfo(const FileSelector& select) override; + FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override; Status CreateDir(const std::string& path, bool recursive = true) override; diff --git a/cpp/src/arrow/filesystem/localfs_benchmark.cc b/cpp/src/arrow/filesystem/localfs_benchmark.cc new file mode 100644 index 0000000000000..1eb15ccfe23a3 --- /dev/null +++ b/cpp/src/arrow/filesystem/localfs_benchmark.cc @@ -0,0 +1,138 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "benchmark/benchmark.h" + +#include "arrow/filesystem/localfs.h" +#include "arrow/io/file.h" +#include "arrow/status.h" +#include "arrow/table.h" +#include "arrow/testing/future_util.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" +#include "arrow/util/async_generator.h" +#include "arrow/util/io_util.h" +#include "arrow/util/make_unique.h" +#include "arrow/util/string_view.h" + +namespace arrow { + +namespace fs { + +using arrow::internal::make_unique; +using arrow::internal::TemporaryDir; + +/// Set up hierarchical directory structure to test asynchronous +/// file discovery interface (GetFileInfoGenerator()) in the LocalFileSystem +/// class. +/// +/// The main routine of the class is `InitializeDatasetStructure()`, which +/// does the following: +/// 1. Create `num_files_` empty files under specified root directory. +/// 2. Create `num_dirs_` additional sub-directories in the current dir. +/// 3. Check if the specified recursion limit is reached (controlled by `nesting_depth_`). +/// a. Return if recursion limit reached. +/// b. Recurse into each sub-directory and perform steps above, increasing current +/// nesting level. +class LocalFSFixture : public benchmark::Fixture { + public: + void SetUp(const benchmark::State& state) override { + ASSERT_OK_AND_ASSIGN(tmp_dir_, TemporaryDir::Make("localfs-test-")); + + auto options = LocalFileSystemOptions::Defaults(); + fs_ = make_unique(options); + + InitializeDatasetStructure(0, tmp_dir_->path()); + } + + void InitializeDatasetStructure(size_t cur_nesting_level, + arrow::internal::PlatformFilename cur_root_dir) { + ASSERT_OK(arrow::internal::CreateDir(cur_root_dir)); + + for (size_t i = 0; i < num_files_; ++i) { + ASSERT_OK_AND_ASSIGN(auto path, + cur_root_dir.Join(std::string{"file_" + std::to_string(i)})); + ASSERT_OK(MakeEmptyFile(path.ToString())); + } + + if (cur_nesting_level == nesting_depth_) { + return; + } + + for (size_t i = 0; i < num_dirs_; ++i) { + ASSERT_OK_AND_ASSIGN(auto path, + cur_root_dir.Join(std::string{"dir_" + std::to_string(i)})); + InitializeDatasetStructure(cur_nesting_level + 1, std::move(path)); + } + } + + Status MakeEmptyFile(const std::string& path) { + return io::FileOutputStream::Open(path).status(); + } + + protected: + std::unique_ptr tmp_dir_; + std::unique_ptr fs_; + + const size_t nesting_depth_ = 2; + const size_t num_dirs_ = 10; + const size_t num_files_ = 1000; +}; + +/// Benchmark for `LocalFileSystem::GetFileInfoGenerator()` performance. +/// +/// The test function is executed for each combination (cartesian product) +/// of input arguments tuple (directory_readahead, file_info_batch_size) +/// to test both internal parallelism and batching. +BENCHMARK_DEFINE_F(LocalFSFixture, AsyncFileDiscovery) +(benchmark::State& st) { + size_t total_file_count = 0; + + for (auto _ : st) { + // Instantiate LocalFileSystem with custom options for directory readahead + // and file info batch size. + auto options = LocalFileSystemOptions::Defaults(); + options.directory_readahead = static_cast(st.range(0)); + options.file_info_batch_size = static_cast(st.range(1)); + auto test_fs = make_unique(options); + // Create recursive FileSelector pointing to the root of the temporary + // directory, which was set up by the fixture earlier. + FileSelector select; + select.base_dir = tmp_dir_->path().ToString(); + select.recursive = true; + auto file_gen = test_fs->GetFileInfoGenerator(std::move(select)); + // Trigger fetching from the generator and count all received FileInfo:s. + auto visit_fut = + VisitAsyncGenerator(file_gen, [&total_file_count](const FileInfoVector& fv) { + total_file_count += fv.size(); + return Status::OK(); + }); + ASSERT_FINISHES_OK(visit_fut); + } + st.SetItemsProcessed(total_file_count); +} +BENCHMARK_REGISTER_F(LocalFSFixture, AsyncFileDiscovery) + ->ArgNames({"directory_readahead", "file_info_batch_size"}) + ->ArgsProduct({{1, 4, 16}, {100, 1000}}) + ->UseRealTime() + ->Unit(benchmark::kMillisecond); + +} // namespace fs + +} // namespace arrow diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 748c832ddd44a..0078a59393808 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -163,8 +164,12 @@ class TestLocalFS : public LocalFSTestMixin { void SetUp() { LocalFSTestMixin::SetUp(); path_formatter_ = PathFormatter(); - local_fs_ = std::make_shared(); local_path_ = EnsureTrailingSlash(path_formatter_(temp_dir_->path().ToString())); + MakeFileSystem(); + } + + void MakeFileSystem() { + local_fs_ = std::make_shared(options_); fs_ = std::make_shared(local_path_, local_fs_); } @@ -248,6 +253,7 @@ class TestLocalFS : public LocalFSTestMixin { protected: PathFormatter path_formatter_; + LocalFileSystemOptions options_ = LocalFileSystemOptions::Defaults(); std::shared_ptr local_fs_; std::shared_ptr fs_; std::string local_path_; @@ -398,6 +404,78 @@ TYPED_TEST(TestLocalFS, FileMTime) { AssertDurationBetween(t2 - infos[1].mtime(), -kTimeSlack, kTimeSlack); } +struct DirTreeCreator { + static constexpr int kFilesPerDir = 50; + static constexpr int kDirLevels = 2; + static constexpr int kSubdirsPerDir = 8; + + FileSystem* fs_; + + Result Create(const std::string& base) { + FileInfoVector infos; + RETURN_NOT_OK(Create(base, 0, &infos)); + return std::move(infos); + } + + Status Create(const std::string& base, int depth, FileInfoVector* infos) { + for (int i = 0; i < kFilesPerDir; ++i) { + std::stringstream ss; + ss << "f" << i; + auto path = ConcatAbstractPath(base, ss.str()); + const int data_size = i % 5; + std::string data(data_size, 'x'); + CreateFile(fs_, path, data); + FileInfo info(std::move(path), FileType::File); + info.set_size(data_size); + infos->push_back(std::move(info)); + } + if (depth < kDirLevels) { + for (int i = 0; i < kSubdirsPerDir; ++i) { + std::stringstream ss; + ss << "d" << i; + auto path = ConcatAbstractPath(base, ss.str()); + RETURN_NOT_OK(fs_->CreateDir(path)); + infos->push_back(FileInfo(path, FileType::Directory)); + RETURN_NOT_OK(Create(path, depth + 1, infos)); + } + } + return Status::OK(); + } +}; + +TYPED_TEST(TestLocalFS, StressGetFileInfoGenerator) { + // Stress GetFileInfoGenerator with large numbers of entries + DirTreeCreator dir_tree_creator{this->local_fs_.get()}; + ASSERT_OK_AND_ASSIGN(FileInfoVector expected, + dir_tree_creator.Create(this->local_path_)); + SortInfos(&expected); + + for (int32_t directory_readahead : {1, 5}) { + for (int32_t file_info_batch_size : {3, 1000}) { + ARROW_SCOPED_TRACE("directory_readahead = ", directory_readahead, + ", file_info_batch_size = ", file_info_batch_size); + this->options_.directory_readahead = directory_readahead; + this->options_.file_info_batch_size = file_info_batch_size; + this->MakeFileSystem(); + + FileSelector selector; + selector.base_dir = this->local_path_; + selector.recursive = true; + + auto gen = this->local_fs_->GetFileInfoGenerator(selector); + FileInfoVector actual; + CollectFileInfoGenerator(gen, &actual); + ASSERT_EQ(actual.size(), expected.size()); + SortInfos(&actual); + + for (int64_t i = 0; i < static_cast(actual.size()); ++i) { + AssertFileInfo(actual[i], expected[i].path(), expected[i].type(), + expected[i].size()); + } + } + } +} + // TODO Should we test backslash paths on Windows? // SubTreeFileSystem isn't compatible with them. diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index dd3973ba7717e..fb933e4d4d561 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -209,6 +209,54 @@ bool S3ProxyOptions::Equals(const S3ProxyOptions& other) const { username == other.username && password == other.password); } +// ----------------------------------------------------------------------- +// AwsRetryStrategy implementation + +class AwsRetryStrategy : public S3RetryStrategy { + public: + explicit AwsRetryStrategy(std::shared_ptr retry_strategy) + : retry_strategy_(std::move(retry_strategy)) {} + + bool ShouldRetry(const AWSErrorDetail& detail, int64_t attempted_retries) override { + Aws::Client::AWSError error = DetailToError(detail); + return retry_strategy_->ShouldRetry( + error, static_cast(attempted_retries)); // NOLINT: runtime/int + } + + int64_t CalculateDelayBeforeNextRetry(const AWSErrorDetail& detail, + int64_t attempted_retries) override { + Aws::Client::AWSError error = DetailToError(detail); + return retry_strategy_->CalculateDelayBeforeNextRetry( + error, static_cast(attempted_retries)); // NOLINT: runtime/int + } + + private: + std::shared_ptr retry_strategy_; + static Aws::Client::AWSError DetailToError( + const S3RetryStrategy::AWSErrorDetail& detail) { + auto exception_name = ToAwsString(detail.exception_name); + auto message = ToAwsString(detail.message); + auto errors = Aws::Client::AWSError( + static_cast(detail.error_type), exception_name, message, + detail.should_retry); + return errors; + } +}; + +std::shared_ptr S3RetryStrategy::GetAwsDefaultRetryStrategy( + int64_t max_attempts) { + return std::make_shared( + std::make_shared( + static_cast(max_attempts))); // NOLINT: runtime/int +} + +std::shared_ptr S3RetryStrategy::GetAwsStandardRetryStrategy( + int64_t max_attempts) { + return std::make_shared( + std::make_shared( + static_cast(max_attempts))); // NOLINT: runtime/int +} + // ----------------------------------------------------------------------- // S3Options implementation @@ -386,10 +434,11 @@ bool S3Options::Equals(const S3Options& other) const { default_metadata_size ? (other.default_metadata && other.default_metadata->Equals(*default_metadata)) : (!other.default_metadata || other.default_metadata->size() == 0); - return (region == other.region && endpoint_override == other.endpoint_override && - scheme == other.scheme && role_arn == other.role_arn && - session_name == other.session_name && external_id == other.external_id && - load_frequency == other.load_frequency && + return (region == other.region && connect_timeout == other.connect_timeout && + request_timeout == other.request_timeout && + endpoint_override == other.endpoint_override && scheme == other.scheme && + role_arn == other.role_arn && session_name == other.session_name && + external_id == other.external_id && load_frequency == other.load_frequency && proxy_options.Equals(other.proxy_options) && credentials_kind == other.credentials_kind && background_writes == other.background_writes && @@ -718,6 +767,16 @@ class ClientBuilder { if (!options_.region.empty()) { client_config_.region = ToAwsString(options_.region); } + if (options_.request_timeout > 0) { + // Use ceil() to avoid setting it to 0 as that probably means no timeout. + client_config_.requestTimeoutMs = + static_cast(ceil(options_.request_timeout * 1000)); // NOLINT runtime/int + } + if (options_.connect_timeout > 0) { + client_config_.connectTimeoutMs = + static_cast(ceil(options_.connect_timeout * 1000)); // NOLINT runtime/int + } + client_config_.endpointOverride = ToAwsString(options_.endpoint_override); if (options_.scheme == "http") { client_config_.scheme = Aws::Http::Scheme::HTTP; diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 05fa404162aba..3b4731883b4ef 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -70,7 +70,7 @@ enum class S3CredentialsKind : int8_t { }; /// Pure virtual class for describing custom S3 retry strategies -class S3RetryStrategy { +class ARROW_EXPORT S3RetryStrategy { public: virtual ~S3RetryStrategy() = default; @@ -90,6 +90,12 @@ class S3RetryStrategy { /// Returns the time in milliseconds the S3 client should sleep for until retrying. virtual int64_t CalculateDelayBeforeNextRetry(const AWSErrorDetail& error, int64_t attempted_retries) = 0; + /// Returns a stock AWS Default retry strategy. + static std::shared_ptr GetAwsDefaultRetryStrategy( + int64_t max_attempts); + /// Returns a stock AWS Standard retry strategy. + static std::shared_ptr GetAwsStandardRetryStrategy( + int64_t max_attempts); }; /// Options for the S3FileSystem implementation. @@ -103,6 +109,17 @@ struct ARROW_EXPORT S3Options { /// server). std::string region; + /// \brief Socket connection timeout, in seconds + /// + /// If negative, the AWS SDK default value is used (typically 1 second). + double connect_timeout = -1; + + /// \brief Socket read timeout on Windows and macOS, in seconds + /// + /// If negative, the AWS SDK default value is used (typically 3 seconds). + /// This option is ignored on non-Windows, non-macOS systems. + double request_timeout = -1; + /// If non-empty, override region with a connect string such as "localhost:9000" // XXX perhaps instead take a URL like "http://localhost:9000"? std::string endpoint_override; diff --git a/cpp/src/arrow/filesystem/type_fwd.h b/cpp/src/arrow/filesystem/type_fwd.h index c6427dc3c8643..892f7ad2e1b16 100644 --- a/cpp/src/arrow/filesystem/type_fwd.h +++ b/cpp/src/arrow/filesystem/type_fwd.h @@ -17,6 +17,8 @@ #pragma once +#include + namespace arrow { namespace fs { diff --git a/cpp/src/arrow/flight/integration_tests/CMakeLists.txt b/cpp/src/arrow/flight/integration_tests/CMakeLists.txt index 66a021b4b5975..1bbd923160642 100644 --- a/cpp/src/arrow/flight/integration_tests/CMakeLists.txt +++ b/cpp/src/arrow/flight/integration_tests/CMakeLists.txt @@ -40,3 +40,14 @@ target_link_libraries(flight-test-integration-client add_dependencies(arrow-integration flight-test-integration-client flight-test-integration-server) + +if(ARROW_BUILD_TESTS) + add_arrow_test(flight_integration_test + SOURCES + flight_integration_test.cc + test_integration.cc + STATIC_LINK_LIBS + ${ARROW_FLIGHT_INTEGRATION_TEST_LINK_LIBS} + LABELS + "arrow_flight") +endif() diff --git a/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc b/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc new file mode 100644 index 0000000000000..706ac3b7d931b --- /dev/null +++ b/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Run the integration test scenarios in-process. + +#include +#include + +#include "arrow/flight/integration_tests/test_integration.h" +#include "arrow/status.h" +#include "arrow/testing/gtest_util.h" + +namespace arrow { +namespace flight { +namespace integration_tests { + +Status RunScenario(const std::string& scenario_name) { + std::shared_ptr scenario; + ARROW_RETURN_NOT_OK(GetScenario(scenario_name, &scenario)); + + std::unique_ptr server; + ARROW_ASSIGN_OR_RAISE(Location bind_location, + arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0)); + FlightServerOptions server_options(bind_location); + ARROW_RETURN_NOT_OK(scenario->MakeServer(&server, &server_options)); + ARROW_RETURN_NOT_OK(server->Init(server_options)); + + ARROW_ASSIGN_OR_RAISE(Location location, + arrow::flight::Location::ForGrpcTcp("0.0.0.0", server->port())); + auto client_options = arrow::flight::FlightClientOptions::Defaults(); + ARROW_RETURN_NOT_OK(scenario->MakeClient(&client_options)); + ARROW_ASSIGN_OR_RAISE(std::unique_ptr client, + FlightClient::Connect(location, client_options)); + ARROW_RETURN_NOT_OK(scenario->RunClient(std::move(client))); + return Status::OK(); +} + +TEST(FlightIntegration, AuthBasicProto) { ASSERT_OK(RunScenario("auth:basic_proto")); } + +TEST(FlightIntegration, Middleware) { ASSERT_OK(RunScenario("middleware")); } + +TEST(FlightIntegration, FlightSql) { ASSERT_OK(RunScenario("flight_sql")); } + +} // namespace integration_tests +} // namespace flight +} // namespace arrow diff --git a/cpp/src/arrow/flight/integration_tests/test_integration.cc b/cpp/src/arrow/flight/integration_tests/test_integration.cc index 2e5609b6c0e50..b228f9cceba06 100644 --- a/cpp/src/arrow/flight/integration_tests/test_integration.cc +++ b/cpp/src/arrow/flight/integration_tests/test_integration.cc @@ -117,7 +117,7 @@ class AuthBasicProtoScenario : public Scenario { /// regardless of what gRPC does. class TestServerMiddleware : public ServerMiddleware { public: - explicit TestServerMiddleware(std::string received) : received_(received) {} + explicit TestServerMiddleware(std::string received) : received_(std::move(received)) {} void SendingHeaders(AddCallHeaders* outgoing_headers) override { outgoing_headers->AddHeader("x-middleware", received_); @@ -154,11 +154,11 @@ class TestClientMiddleware : public ClientMiddleware { explicit TestClientMiddleware(std::string* received_header) : received_header_(received_header) {} - void SendingHeaders(AddCallHeaders* outgoing_headers) { + void SendingHeaders(AddCallHeaders* outgoing_headers) override { outgoing_headers->AddHeader("x-middleware", "expected value"); } - void ReceivedHeaders(const CallHeaders& incoming_headers) { + void ReceivedHeaders(const CallHeaders& incoming_headers) override { // We expect the server to always send this header. gRPC/Java may // send it in trailers instead of headers, so we expect Flight to // account for this. @@ -170,7 +170,7 @@ class TestClientMiddleware : public ClientMiddleware { } } - void CallCompleted(const Status& status) {} + void CallCompleted(const Status& status) override {} private: std::string* received_header_; @@ -178,7 +178,8 @@ class TestClientMiddleware : public ClientMiddleware { class TestClientMiddlewareFactory : public ClientMiddlewareFactory { public: - void StartCall(const CallInfo& info, std::unique_ptr* middleware) { + void StartCall(const CallInfo& info, + std::unique_ptr* middleware) override { *middleware = std::unique_ptr(new TestClientMiddleware(&received_header_)); } @@ -218,8 +219,8 @@ class MiddlewareServer : public FlightServerBase { class MiddlewareScenario : public Scenario { Status MakeServer(std::unique_ptr* server, FlightServerOptions* options) override { - options->middleware.push_back( - {"grpc_trailers", std::make_shared()}); + options->middleware.emplace_back("grpc_trailers", + std::make_shared()); server->reset(new MiddlewareServer()); return Status::OK(); } @@ -284,11 +285,13 @@ std::shared_ptr GetQuerySchema() { constexpr int64_t kUpdateStatementExpectedRows = 10000L; constexpr int64_t kUpdatePreparedStatementExpectedRows = 20000L; +constexpr char kSelectStatement[] = "SELECT STATEMENT"; template -arrow::Status AssertEq(const T& expected, const T& actual) { +arrow::Status AssertEq(const T& expected, const T& actual, const std::string& message) { if (expected != actual) { - return Status::Invalid("Expected \"", expected, "\", got \'", actual, "\""); + return Status::Invalid(message, ": expected \"", expected, "\", got \"", actual, + "\""); } return Status::OK(); } @@ -301,7 +304,9 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result> GetFlightInfoStatement( const ServerCallContext& context, const sql::StatementQuery& command, const FlightDescriptor& descriptor) override { - ARROW_RETURN_NOT_OK(AssertEq("SELECT STATEMENT", command.query)); + ARROW_RETURN_NOT_OK( + AssertEq(kSelectStatement, command.query, + "Unexpected statement in GetFlightInfoStatement")); ARROW_ASSIGN_OR_RAISE(auto handle, sql::CreateStatementQueryTicket("SELECT STATEMENT HANDLE")); @@ -313,6 +318,14 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { return std::unique_ptr(new FlightInfo(result)); } + arrow::Result> GetSchemaStatement( + const ServerCallContext& context, const sql::StatementQuery& command, + const FlightDescriptor& descriptor) override { + ARROW_RETURN_NOT_OK(AssertEq( + kSelectStatement, command.query, "Unexpected statement in GetSchemaStatement")); + return SchemaResult::Make(*GetQuerySchema()); + } + arrow::Result> DoGetStatement( const ServerCallContext& context, const sql::StatementQueryTicket& command) override { @@ -323,11 +336,21 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { const ServerCallContext& context, const sql::PreparedStatementQuery& command, const FlightDescriptor& descriptor) override { ARROW_RETURN_NOT_OK(AssertEq("SELECT PREPARED STATEMENT HANDLE", - command.prepared_statement_handle)); + command.prepared_statement_handle, + "Unexpected prepared statement handle")); return GetFlightInfoForCommand(descriptor, GetQuerySchema()); } + arrow::Result> GetSchemaPreparedStatement( + const ServerCallContext& context, const sql::PreparedStatementQuery& command, + const FlightDescriptor& descriptor) override { + ARROW_RETURN_NOT_OK(AssertEq("SELECT PREPARED STATEMENT HANDLE", + command.prepared_statement_handle, + "Unexpected prepared statement handle")); + return SchemaResult::Make(*GetQuerySchema()); + } + arrow::Result> DoGetPreparedStatement( const ServerCallContext& context, const sql::PreparedStatementQuery& command) override { @@ -358,11 +381,14 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result> GetFlightInfoSqlInfo( const ServerCallContext& context, const sql::GetSqlInfo& command, const FlightDescriptor& descriptor) override { - ARROW_RETURN_NOT_OK(AssertEq(2, command.info.size())); - ARROW_RETURN_NOT_OK(AssertEq( - sql::SqlInfoOptions::SqlInfo::FLIGHT_SQL_SERVER_NAME, command.info[0])); - ARROW_RETURN_NOT_OK(AssertEq( - sql::SqlInfoOptions::SqlInfo::FLIGHT_SQL_SERVER_READ_ONLY, command.info[1])); + ARROW_RETURN_NOT_OK(AssertEq(2, command.info.size(), + "Wrong number of SqlInfo values passed")); + ARROW_RETURN_NOT_OK( + AssertEq(sql::SqlInfoOptions::SqlInfo::FLIGHT_SQL_SERVER_NAME, + command.info[0], "Unexpected SqlInfo passed")); + ARROW_RETURN_NOT_OK( + AssertEq(sql::SqlInfoOptions::SqlInfo::FLIGHT_SQL_SERVER_READ_ONLY, + command.info[1], "Unexpected SqlInfo passed")); return GetFlightInfoForCommand(descriptor, sql::SqlSchema::GetSqlInfoSchema()); } @@ -375,9 +401,11 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result> GetFlightInfoSchemas( const ServerCallContext& context, const sql::GetDbSchemas& command, const FlightDescriptor& descriptor) override { - ARROW_RETURN_NOT_OK(AssertEq("catalog", command.catalog.value())); + ARROW_RETURN_NOT_OK(AssertEq("catalog", command.catalog.value(), + "Wrong catalog passed")); ARROW_RETURN_NOT_OK(AssertEq("db_schema_filter_pattern", - command.db_schema_filter_pattern.value())); + command.db_schema_filter_pattern.value(), + "Wrong db_schema_filter_pattern passed")); return GetFlightInfoForCommand(descriptor, sql::SqlSchema::GetDbSchemasSchema()); } @@ -390,15 +418,22 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result> GetFlightInfoTables( const ServerCallContext& context, const sql::GetTables& command, const FlightDescriptor& descriptor) override { - ARROW_RETURN_NOT_OK(AssertEq("catalog", command.catalog.value())); + ARROW_RETURN_NOT_OK(AssertEq("catalog", command.catalog.value(), + "Wrong catalog passed")); ARROW_RETURN_NOT_OK(AssertEq("db_schema_filter_pattern", - command.db_schema_filter_pattern.value())); + command.db_schema_filter_pattern.value(), + "Wrong db_schema_filter_pattern passed")); ARROW_RETURN_NOT_OK(AssertEq("table_filter_pattern", - command.table_name_filter_pattern.value())); - ARROW_RETURN_NOT_OK(AssertEq(2, command.table_types.size())); - ARROW_RETURN_NOT_OK(AssertEq("table", command.table_types[0])); - ARROW_RETURN_NOT_OK(AssertEq("view", command.table_types[1])); - ARROW_RETURN_NOT_OK(AssertEq(true, command.include_schema)); + command.table_name_filter_pattern.value(), + "Wrong table_filter_pattern passed")); + ARROW_RETURN_NOT_OK(AssertEq(2, command.table_types.size(), + "Wrong number of table types passed")); + ARROW_RETURN_NOT_OK(AssertEq("table", command.table_types[0], + "Wrong table type passed")); + ARROW_RETURN_NOT_OK( + AssertEq("view", command.table_types[1], "Wrong table type passed")); + ARROW_RETURN_NOT_OK( + AssertEq(true, command.include_schema, "include_schema should be true")); return GetFlightInfoForCommand(descriptor, sql::SqlSchema::GetTablesSchemaWithIncludedSchema()); @@ -422,11 +457,12 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result> GetFlightInfoPrimaryKeys( const ServerCallContext& context, const sql::GetPrimaryKeys& command, const FlightDescriptor& descriptor) override { + ARROW_RETURN_NOT_OK(AssertEq( + "catalog", command.table_ref.catalog.value(), "Wrong catalog passed")); + ARROW_RETURN_NOT_OK(AssertEq( + "db_schema", command.table_ref.db_schema.value(), "Wrong db_schema passed")); ARROW_RETURN_NOT_OK( - AssertEq("catalog", command.table_ref.catalog.value())); - ARROW_RETURN_NOT_OK( - AssertEq("db_schema", command.table_ref.db_schema.value())); - ARROW_RETURN_NOT_OK(AssertEq("table", command.table_ref.table)); + AssertEq("table", command.table_ref.table, "Wrong table passed")); return GetFlightInfoForCommand(descriptor, sql::SqlSchema::GetPrimaryKeysSchema()); } @@ -439,11 +475,12 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result> GetFlightInfoExportedKeys( const ServerCallContext& context, const sql::GetExportedKeys& command, const FlightDescriptor& descriptor) override { + ARROW_RETURN_NOT_OK(AssertEq( + "catalog", command.table_ref.catalog.value(), "Wrong catalog passed")); + ARROW_RETURN_NOT_OK(AssertEq( + "db_schema", command.table_ref.db_schema.value(), "Wrong db_schema passed")); ARROW_RETURN_NOT_OK( - AssertEq("catalog", command.table_ref.catalog.value())); - ARROW_RETURN_NOT_OK( - AssertEq("db_schema", command.table_ref.db_schema.value())); - ARROW_RETURN_NOT_OK(AssertEq("table", command.table_ref.table)); + AssertEq("table", command.table_ref.table, "Wrong table passed")); return GetFlightInfoForCommand(descriptor, sql::SqlSchema::GetExportedKeysSchema()); } @@ -456,11 +493,12 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result> GetFlightInfoImportedKeys( const ServerCallContext& context, const sql::GetImportedKeys& command, const FlightDescriptor& descriptor) override { + ARROW_RETURN_NOT_OK(AssertEq( + "catalog", command.table_ref.catalog.value(), "Wrong catalog passed")); + ARROW_RETURN_NOT_OK(AssertEq( + "db_schema", command.table_ref.db_schema.value(), "Wrong db_schema passed")); ARROW_RETURN_NOT_OK( - AssertEq("catalog", command.table_ref.catalog.value())); - ARROW_RETURN_NOT_OK( - AssertEq("db_schema", command.table_ref.db_schema.value())); - ARROW_RETURN_NOT_OK(AssertEq("table", command.table_ref.table)); + AssertEq("table", command.table_ref.table, "Wrong table passed")); return GetFlightInfoForCommand(descriptor, sql::SqlSchema::GetImportedKeysSchema()); } @@ -473,16 +511,20 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result> GetFlightInfoCrossReference( const ServerCallContext& context, const sql::GetCrossReference& command, const FlightDescriptor& descriptor) override { - ARROW_RETURN_NOT_OK( - AssertEq("pk_catalog", command.pk_table_ref.catalog.value())); - ARROW_RETURN_NOT_OK( - AssertEq("pk_db_schema", command.pk_table_ref.db_schema.value())); - ARROW_RETURN_NOT_OK(AssertEq("pk_table", command.pk_table_ref.table)); - ARROW_RETURN_NOT_OK( - AssertEq("fk_catalog", command.fk_table_ref.catalog.value())); - ARROW_RETURN_NOT_OK( - AssertEq("fk_db_schema", command.fk_table_ref.db_schema.value())); - ARROW_RETURN_NOT_OK(AssertEq("fk_table", command.fk_table_ref.table)); + ARROW_RETURN_NOT_OK(AssertEq( + "pk_catalog", command.pk_table_ref.catalog.value(), "Wrong pk catalog passed")); + ARROW_RETURN_NOT_OK(AssertEq("pk_db_schema", + command.pk_table_ref.db_schema.value(), + "Wrong pk db_schema passed")); + ARROW_RETURN_NOT_OK(AssertEq("pk_table", command.pk_table_ref.table, + "Wrong pk table passed")); + ARROW_RETURN_NOT_OK(AssertEq( + "fk_catalog", command.fk_table_ref.catalog.value(), "Wrong fk catalog passed")); + ARROW_RETURN_NOT_OK(AssertEq("fk_db_schema", + command.fk_table_ref.db_schema.value(), + "Wrong fk db_schema passed")); + ARROW_RETURN_NOT_OK(AssertEq("fk_table", command.fk_table_ref.table, + "Wrong fk table passed")); return GetFlightInfoForCommand(descriptor, sql::SqlSchema::GetTableTypesSchema()); } @@ -494,7 +536,9 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result DoPutCommandStatementUpdate( const ServerCallContext& context, const sql::StatementUpdate& command) override { - ARROW_RETURN_NOT_OK(AssertEq("UPDATE STATEMENT", command.query)); + ARROW_RETURN_NOT_OK( + AssertEq("UPDATE STATEMENT", command.query, + "Wrong query for DoPutCommandStatementUpdate")); return kUpdateStatementExpectedRows; } @@ -502,9 +546,10 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result CreatePreparedStatement( const ServerCallContext& context, const sql::ActionCreatePreparedStatementRequest& request) override { - ARROW_RETURN_NOT_OK( - AssertEq(true, request.query == "SELECT PREPARED STATEMENT" || - request.query == "UPDATE PREPARED STATEMENT")); + if (request.query != "SELECT PREPARED STATEMENT" && + request.query != "UPDATE PREPARED STATEMENT") { + return Status::Invalid("Unexpected query: ", request.query); + } sql::ActionCreatePreparedStatementResult result; result.prepared_statement_handle = request.query + " HANDLE"; @@ -515,6 +560,11 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { Status ClosePreparedStatement( const ServerCallContext& context, const sql::ActionClosePreparedStatementRequest& request) override { + if (request.prepared_statement_handle != "SELECT PREPARED STATEMENT HANDLE" && + request.prepared_statement_handle != "UPDATE PREPARED STATEMENT HANDLE") { + return Status::Invalid("Invalid handle for ClosePreparedStatement: ", + request.prepared_statement_handle); + } return Status::OK(); } @@ -522,11 +572,14 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { const sql::PreparedStatementQuery& command, FlightMessageReader* reader, FlightMetadataWriter* writer) override { - ARROW_RETURN_NOT_OK(AssertEq("SELECT PREPARED STATEMENT HANDLE", - command.prepared_statement_handle)); + if (command.prepared_statement_handle != "SELECT PREPARED STATEMENT HANDLE") { + return Status::Invalid("Invalid handle for DoPutPreparedStatementQuery: ", + command.prepared_statement_handle); + } ARROW_ASSIGN_OR_RAISE(auto actual_schema, reader->GetSchema()); - ARROW_RETURN_NOT_OK(AssertEq(*GetQuerySchema(), *actual_schema)); + ARROW_RETURN_NOT_OK(AssertEq(*GetQuerySchema(), *actual_schema, + "Wrong schema for DoPutPreparedStatementQuery")); return Status::OK(); } @@ -534,10 +587,11 @@ class FlightSqlScenarioServer : public sql::FlightSqlServerBase { arrow::Result DoPutPreparedStatementUpdate( const ServerCallContext& context, const sql::PreparedStatementUpdate& command, FlightMessageReader* reader) override { - ARROW_RETURN_NOT_OK(AssertEq("UPDATE PREPARED STATEMENT HANDLE", - command.prepared_statement_handle)); - - return kUpdatePreparedStatementExpectedRows; + if (command.prepared_statement_handle == "UPDATE PREPARED STATEMENT HANDLE") { + return kUpdatePreparedStatementExpectedRows; + } + return Status::Invalid("Invalid handle for DoPutPreparedStatementUpdate: ", + command.prepared_statement_handle); } private: @@ -569,31 +623,37 @@ class FlightSqlScenario : public Scenario { Status MakeClient(FlightClientOptions* options) override { return Status::OK(); } - Status Validate(std::shared_ptr expected_schema, - arrow::Result> flight_info_result, - sql::FlightSqlClient* sql_client) { + Status Validate(const std::shared_ptr& expected_schema, + const FlightInfo& flight_info, sql::FlightSqlClient* sql_client) { FlightCallOptions call_options; - - ARROW_ASSIGN_OR_RAISE(auto flight_info, flight_info_result); ARROW_ASSIGN_OR_RAISE( - auto reader, sql_client->DoGet(call_options, flight_info->endpoints()[0].ticket)); - + std::unique_ptr reader, + sql_client->DoGet(call_options, flight_info.endpoints()[0].ticket)); ARROW_ASSIGN_OR_RAISE(auto actual_schema, reader->GetSchema()); + if (!expected_schema->Equals(*actual_schema, /*check_metadata=*/true)) { + return Status::Invalid("Schemas did not match. Expected:\n", *expected_schema, + "\nActual:\n", *actual_schema); + } + ARROW_RETURN_NOT_OK(reader->ToTable()); + return Status::OK(); + } - AssertSchemaEqual(expected_schema, actual_schema); - + Status ValidateSchema(const std::shared_ptr& expected_schema, + const SchemaResult& result) { + ipc::DictionaryMemo memo; + ARROW_ASSIGN_OR_RAISE(auto actual_schema, result.GetSchema(&memo)); + if (!expected_schema->Equals(*actual_schema, /*check_metadata=*/true)) { + return Status::Invalid("Schemas did not match. Expected:\n", *expected_schema, + "\nActual:\n", *actual_schema); + } return Status::OK(); } Status RunClient(std::unique_ptr client) override { sql::FlightSqlClient sql_client(std::move(client)); - ARROW_RETURN_NOT_OK(ValidateMetadataRetrieval(&sql_client)); - ARROW_RETURN_NOT_OK(ValidateStatementExecution(&sql_client)); - ARROW_RETURN_NOT_OK(ValidatePreparedStatementExecution(&sql_client)); - return Status::OK(); } @@ -611,82 +671,119 @@ class FlightSqlScenario : public Scenario { sql::TableRef pk_table_ref = {"pk_catalog", "pk_db_schema", "pk_table"}; sql::TableRef fk_table_ref = {"fk_catalog", "fk_db_schema", "fk_table"}; - ARROW_RETURN_NOT_OK(Validate(sql::SqlSchema::GetCatalogsSchema(), - sql_client->GetCatalogs(options), sql_client)); + std::unique_ptr info; + std::unique_ptr schema; + + ARROW_ASSIGN_OR_RAISE(info, sql_client->GetCatalogs(options)); + ARROW_ASSIGN_OR_RAISE(schema, sql_client->GetCatalogsSchema(options)); + ARROW_RETURN_NOT_OK(Validate(sql::SqlSchema::GetCatalogsSchema(), *info, sql_client)); + ARROW_RETURN_NOT_OK(ValidateSchema(sql::SqlSchema::GetCatalogsSchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE( + info, sql_client->GetDbSchemas(options, &catalog, &db_schema_filter_pattern)); + ARROW_ASSIGN_OR_RAISE(schema, sql_client->GetDbSchemasSchema(options)); + ARROW_RETURN_NOT_OK( + Validate(sql::SqlSchema::GetDbSchemasSchema(), *info, sql_client)); + ARROW_RETURN_NOT_OK(ValidateSchema(sql::SqlSchema::GetDbSchemasSchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE( + info, sql_client->GetTables(options, &catalog, &db_schema_filter_pattern, + &table_filter_pattern, true, &table_types)); + ARROW_ASSIGN_OR_RAISE(schema, + sql_client->GetTablesSchema(options, /*include_schema=*/true)); + ARROW_RETURN_NOT_OK( + Validate(sql::SqlSchema::GetTablesSchemaWithIncludedSchema(), *info, sql_client)); + ARROW_RETURN_NOT_OK( + ValidateSchema(sql::SqlSchema::GetTablesSchemaWithIncludedSchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE(schema, + sql_client->GetTablesSchema(options, /*include_schema=*/false)); + ARROW_RETURN_NOT_OK(ValidateSchema(sql::SqlSchema::GetTablesSchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE(info, sql_client->GetTableTypes(options)); + ARROW_ASSIGN_OR_RAISE(schema, sql_client->GetTableTypesSchema(options)); + ARROW_RETURN_NOT_OK( + Validate(sql::SqlSchema::GetTableTypesSchema(), *info, sql_client)); + ARROW_RETURN_NOT_OK(ValidateSchema(sql::SqlSchema::GetTableTypesSchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE(info, sql_client->GetPrimaryKeys(options, table_ref)); + ARROW_ASSIGN_OR_RAISE(schema, sql_client->GetPrimaryKeysSchema(options)); ARROW_RETURN_NOT_OK( - Validate(sql::SqlSchema::GetDbSchemasSchema(), - sql_client->GetDbSchemas(options, &catalog, &db_schema_filter_pattern), - sql_client)); + Validate(sql::SqlSchema::GetPrimaryKeysSchema(), *info, sql_client)); + ARROW_RETURN_NOT_OK(ValidateSchema(sql::SqlSchema::GetPrimaryKeysSchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE(info, sql_client->GetExportedKeys(options, table_ref)); + ARROW_ASSIGN_OR_RAISE(schema, sql_client->GetExportedKeysSchema(options)); + ARROW_RETURN_NOT_OK( + Validate(sql::SqlSchema::GetExportedKeysSchema(), *info, sql_client)); + ARROW_RETURN_NOT_OK(ValidateSchema(sql::SqlSchema::GetExportedKeysSchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE(info, sql_client->GetImportedKeys(options, table_ref)); + ARROW_ASSIGN_OR_RAISE(schema, sql_client->GetImportedKeysSchema(options)); ARROW_RETURN_NOT_OK( - Validate(sql::SqlSchema::GetTablesSchemaWithIncludedSchema(), - sql_client->GetTables(options, &catalog, &db_schema_filter_pattern, - &table_filter_pattern, true, &table_types), - sql_client)); - ARROW_RETURN_NOT_OK(Validate(sql::SqlSchema::GetTableTypesSchema(), - sql_client->GetTableTypes(options), sql_client)); - ARROW_RETURN_NOT_OK(Validate(sql::SqlSchema::GetPrimaryKeysSchema(), - sql_client->GetPrimaryKeys(options, table_ref), - sql_client)); - ARROW_RETURN_NOT_OK(Validate(sql::SqlSchema::GetExportedKeysSchema(), - sql_client->GetExportedKeys(options, table_ref), - sql_client)); - ARROW_RETURN_NOT_OK(Validate(sql::SqlSchema::GetImportedKeysSchema(), - sql_client->GetImportedKeys(options, table_ref), - sql_client)); - ARROW_RETURN_NOT_OK(Validate( - sql::SqlSchema::GetCrossReferenceSchema(), - sql_client->GetCrossReference(options, pk_table_ref, fk_table_ref), sql_client)); - ARROW_RETURN_NOT_OK(Validate(sql::SqlSchema::GetXdbcTypeInfoSchema(), - sql_client->GetXdbcTypeInfo(options), sql_client)); - ARROW_RETURN_NOT_OK(Validate( - sql::SqlSchema::GetSqlInfoSchema(), - sql_client->GetSqlInfo( - options, {sql::SqlInfoOptions::SqlInfo::FLIGHT_SQL_SERVER_NAME, - sql::SqlInfoOptions::SqlInfo::FLIGHT_SQL_SERVER_READ_ONLY}), - sql_client)); + Validate(sql::SqlSchema::GetImportedKeysSchema(), *info, sql_client)); + ARROW_RETURN_NOT_OK(ValidateSchema(sql::SqlSchema::GetImportedKeysSchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE( + info, sql_client->GetCrossReference(options, pk_table_ref, fk_table_ref)); + ARROW_ASSIGN_OR_RAISE(schema, sql_client->GetCrossReferenceSchema(options)); + ARROW_RETURN_NOT_OK( + Validate(sql::SqlSchema::GetCrossReferenceSchema(), *info, sql_client)); + ARROW_RETURN_NOT_OK( + ValidateSchema(sql::SqlSchema::GetCrossReferenceSchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE(info, sql_client->GetXdbcTypeInfo(options)); + ARROW_ASSIGN_OR_RAISE(schema, sql_client->GetXdbcTypeInfoSchema(options)); + ARROW_RETURN_NOT_OK( + Validate(sql::SqlSchema::GetXdbcTypeInfoSchema(), *info, sql_client)); + ARROW_RETURN_NOT_OK(ValidateSchema(sql::SqlSchema::GetXdbcTypeInfoSchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE( + info, sql_client->GetSqlInfo( + options, {sql::SqlInfoOptions::SqlInfo::FLIGHT_SQL_SERVER_NAME, + sql::SqlInfoOptions::SqlInfo::FLIGHT_SQL_SERVER_READ_ONLY})); + ARROW_ASSIGN_OR_RAISE(schema, sql_client->GetSqlInfoSchema(options)); + ARROW_RETURN_NOT_OK(Validate(sql::SqlSchema::GetSqlInfoSchema(), *info, sql_client)); + ARROW_RETURN_NOT_OK(ValidateSchema(sql::SqlSchema::GetSqlInfoSchema(), *schema)); return Status::OK(); } Status ValidateStatementExecution(sql::FlightSqlClient* sql_client) { - FlightCallOptions options; + ARROW_ASSIGN_OR_RAISE(auto info, sql_client->Execute({}, kSelectStatement)); + ARROW_RETURN_NOT_OK(Validate(GetQuerySchema(), *info, sql_client)); - ARROW_RETURN_NOT_OK(Validate( - GetQuerySchema(), sql_client->Execute(options, "SELECT STATEMENT"), sql_client)); - ARROW_ASSIGN_OR_RAISE(auto update_statement_result, - sql_client->ExecuteUpdate(options, "UPDATE STATEMENT")); - if (update_statement_result != kUpdateStatementExpectedRows) { - return Status::Invalid("Expected 'UPDATE STATEMENT' return ", - kUpdateStatementExpectedRows, ", got ", - update_statement_result); - } + ARROW_ASSIGN_OR_RAISE(auto schema, + sql_client->GetExecuteSchema({}, kSelectStatement)); + ARROW_RETURN_NOT_OK(ValidateSchema(GetQuerySchema(), *schema)); + + ARROW_ASSIGN_OR_RAISE(auto updated_rows, + sql_client->ExecuteUpdate({}, "UPDATE STATEMENT")); + ARROW_RETURN_NOT_OK(AssertEq(kUpdateStatementExpectedRows, updated_rows, + "Wrong number of updated rows for ExecuteUpdate")); return Status::OK(); } Status ValidatePreparedStatementExecution(sql::FlightSqlClient* sql_client) { - FlightCallOptions options; - - ARROW_ASSIGN_OR_RAISE(auto select_prepared_statement, - sql_client->Prepare(options, "SELECT PREPARED STATEMENT")); - auto parameters = RecordBatch::Make(GetQuerySchema(), 1, {ArrayFromJSON(int64(), "[1]")}); - ARROW_RETURN_NOT_OK(select_prepared_statement->SetParameters(parameters)); - ARROW_RETURN_NOT_OK( - Validate(GetQuerySchema(), select_prepared_statement->Execute(), sql_client)); + ARROW_ASSIGN_OR_RAISE(auto select_prepared_statement, + sql_client->Prepare({}, "SELECT PREPARED STATEMENT")); + ARROW_RETURN_NOT_OK(select_prepared_statement->SetParameters(parameters)); + ARROW_ASSIGN_OR_RAISE(auto info, select_prepared_statement->Execute()); + ARROW_RETURN_NOT_OK(Validate(GetQuerySchema(), *info, sql_client)); + ARROW_ASSIGN_OR_RAISE(auto schema, select_prepared_statement->GetSchema({})); + ARROW_RETURN_NOT_OK(ValidateSchema(GetQuerySchema(), *schema)); ARROW_RETURN_NOT_OK(select_prepared_statement->Close()); ARROW_ASSIGN_OR_RAISE(auto update_prepared_statement, - sql_client->Prepare(options, "UPDATE PREPARED STATEMENT")); - ARROW_ASSIGN_OR_RAISE(auto update_prepared_statement_result, - update_prepared_statement->ExecuteUpdate()); - if (update_prepared_statement_result != kUpdatePreparedStatementExpectedRows) { - return Status::Invalid("Expected 'UPDATE STATEMENT' return ", - kUpdatePreparedStatementExpectedRows, ", got ", - update_prepared_statement_result); - } + sql_client->Prepare({}, "UPDATE PREPARED STATEMENT")); + ARROW_ASSIGN_OR_RAISE(auto updated_rows, update_prepared_statement->ExecuteUpdate()); + ARROW_RETURN_NOT_OK( + AssertEq(kUpdatePreparedStatementExpectedRows, updated_rows, + "Wrong number of updated rows for prepared statement ExecuteUpdate")); ARROW_RETURN_NOT_OK(update_prepared_statement->Close()); return Status::OK(); diff --git a/cpp/src/arrow/flight/sql/client.cc b/cpp/src/arrow/flight/sql/client.cc index 10ff1eea6f4cb..e299b7ceb11d4 100644 --- a/cpp/src/arrow/flight/sql/client.cc +++ b/cpp/src/arrow/flight/sql/client.cc @@ -36,15 +36,45 @@ namespace arrow { namespace flight { namespace sql { +namespace { +arrow::Result GetFlightDescriptorForCommand( + const google::protobuf::Message& command) { + google::protobuf::Any any; + if (!any.PackFrom(command)) { + return Status::SerializationError("Failed to pack ", command.GetTypeName()); + } + + std::string buf; + if (!any.SerializeToString(&buf)) { + return Status::SerializationError("Failed to serialize ", command.GetTypeName()); + } + return FlightDescriptor::Command(buf); +} + +arrow::Result> GetFlightInfoForCommand( + FlightSqlClient* client, const FlightCallOptions& options, + const google::protobuf::Message& command) { + ARROW_ASSIGN_OR_RAISE(FlightDescriptor descriptor, + GetFlightDescriptorForCommand(command)); + return client->GetFlightInfo(options, descriptor); +} + +arrow::Result> GetSchemaForCommand( + FlightSqlClient* client, const FlightCallOptions& options, + const google::protobuf::Message& command) { + ARROW_ASSIGN_OR_RAISE(FlightDescriptor descriptor, + GetFlightDescriptorForCommand(command)); + return client->GetSchema(options, descriptor); +} +} // namespace + FlightSqlClient::FlightSqlClient(std::shared_ptr client) : impl_(std::move(client)) {} PreparedStatement::PreparedStatement(FlightSqlClient* client, std::string handle, std::shared_ptr dataset_schema, - std::shared_ptr parameter_schema, - FlightCallOptions options) + std::shared_ptr parameter_schema) : client_(client), - options_(std::move(options)), handle_(std::move(handle)), dataset_schema_(std::move(dataset_schema)), parameter_schema_(std::move(parameter_schema)), @@ -59,30 +89,20 @@ PreparedStatement::~PreparedStatement() { } } -inline FlightDescriptor GetFlightDescriptorForCommand( - const google::protobuf::Message& command) { - google::protobuf::Any any; - any.PackFrom(command); - - const std::string& string = any.SerializeAsString(); - return FlightDescriptor::Command(string); -} - -arrow::Result> GetFlightInfoForCommand( - FlightSqlClient& client, const FlightCallOptions& options, - const google::protobuf::Message& command) { - const FlightDescriptor& descriptor = GetFlightDescriptorForCommand(command); +arrow::Result> FlightSqlClient::Execute( + const FlightCallOptions& options, const std::string& query) { + flight_sql_pb::CommandStatementQuery command; + command.set_query(query); - ARROW_ASSIGN_OR_RAISE(auto flight_info, client.GetFlightInfo(options, descriptor)); - return std::move(flight_info); + return GetFlightInfoForCommand(this, options, command); } -arrow::Result> FlightSqlClient::Execute( +arrow::Result> FlightSqlClient::GetExecuteSchema( const FlightCallOptions& options, const std::string& query) { flight_sql_pb::CommandStatementQuery command; command.set_query(query); - return GetFlightInfoForCommand(*this, options, command); + return GetSchemaForCommand(this, options, command); } arrow::Result FlightSqlClient::ExecuteUpdate(const FlightCallOptions& options, @@ -90,7 +110,8 @@ arrow::Result FlightSqlClient::ExecuteUpdate(const FlightCallOptions& o flight_sql_pb::CommandStatementUpdate command; command.set_query(query); - const FlightDescriptor& descriptor = GetFlightDescriptorForCommand(command); + ARROW_ASSIGN_OR_RAISE(FlightDescriptor descriptor, + GetFlightDescriptorForCommand(command)); std::unique_ptr writer; std::unique_ptr reader; @@ -114,8 +135,13 @@ arrow::Result FlightSqlClient::ExecuteUpdate(const FlightCallOptions& o arrow::Result> FlightSqlClient::GetCatalogs( const FlightCallOptions& options) { flight_sql_pb::CommandGetCatalogs command; + return GetFlightInfoForCommand(this, options, command); +} - return GetFlightInfoForCommand(*this, options, command); +arrow::Result> FlightSqlClient::GetCatalogsSchema( + const FlightCallOptions& options) { + flight_sql_pb::CommandGetCatalogs command; + return GetSchemaForCommand(this, options, command); } arrow::Result> FlightSqlClient::GetDbSchemas( @@ -129,7 +155,13 @@ arrow::Result> FlightSqlClient::GetDbSchemas( command.set_db_schema_filter_pattern(*db_schema_filter_pattern); } - return GetFlightInfoForCommand(*this, options, command); + return GetFlightInfoForCommand(this, options, command); +} + +arrow::Result> FlightSqlClient::GetDbSchemasSchema( + const FlightCallOptions& options) { + flight_sql_pb::CommandGetDbSchemas command; + return GetSchemaForCommand(this, options, command); } arrow::Result> FlightSqlClient::GetTables( @@ -158,7 +190,14 @@ arrow::Result> FlightSqlClient::GetTables( } } - return GetFlightInfoForCommand(*this, options, command); + return GetFlightInfoForCommand(this, options, command); +} + +arrow::Result> FlightSqlClient::GetTablesSchema( + const FlightCallOptions& options, bool include_schema) { + flight_sql_pb::CommandGetTables command; + command.set_include_schema(include_schema); + return GetSchemaForCommand(this, options, command); } arrow::Result> FlightSqlClient::GetPrimaryKeys( @@ -175,7 +214,13 @@ arrow::Result> FlightSqlClient::GetPrimaryKeys( command.set_table(table_ref.table); - return GetFlightInfoForCommand(*this, options, command); + return GetFlightInfoForCommand(this, options, command); +} + +arrow::Result> FlightSqlClient::GetPrimaryKeysSchema( + const FlightCallOptions& options) { + flight_sql_pb::CommandGetPrimaryKeys command; + return GetSchemaForCommand(this, options, command); } arrow::Result> FlightSqlClient::GetExportedKeys( @@ -192,7 +237,13 @@ arrow::Result> FlightSqlClient::GetExportedKeys( command.set_table(table_ref.table); - return GetFlightInfoForCommand(*this, options, command); + return GetFlightInfoForCommand(this, options, command); +} + +arrow::Result> FlightSqlClient::GetExportedKeysSchema( + const FlightCallOptions& options) { + flight_sql_pb::CommandGetExportedKeys command; + return GetSchemaForCommand(this, options, command); } arrow::Result> FlightSqlClient::GetImportedKeys( @@ -209,7 +260,13 @@ arrow::Result> FlightSqlClient::GetImportedKeys( command.set_table(table_ref.table); - return GetFlightInfoForCommand(*this, options, command); + return GetFlightInfoForCommand(this, options, command); +} + +arrow::Result> FlightSqlClient::GetImportedKeysSchema( + const FlightCallOptions& options) { + flight_sql_pb::CommandGetImportedKeys command; + return GetSchemaForCommand(this, options, command); } arrow::Result> FlightSqlClient::GetCrossReference( @@ -233,21 +290,33 @@ arrow::Result> FlightSqlClient::GetCrossReference( } command.set_fk_table(fk_table_ref.table); - return GetFlightInfoForCommand(*this, options, command); + return GetFlightInfoForCommand(this, options, command); +} + +arrow::Result> FlightSqlClient::GetCrossReferenceSchema( + const FlightCallOptions& options) { + flight_sql_pb::CommandGetCrossReference command; + return GetSchemaForCommand(this, options, command); } arrow::Result> FlightSqlClient::GetTableTypes( const FlightCallOptions& options) { flight_sql_pb::CommandGetTableTypes command; - return GetFlightInfoForCommand(*this, options, command); + return GetFlightInfoForCommand(this, options, command); +} + +arrow::Result> FlightSqlClient::GetTableTypesSchema( + const FlightCallOptions& options) { + flight_sql_pb::CommandGetTableTypes command; + return GetSchemaForCommand(this, options, command); } arrow::Result> FlightSqlClient::GetXdbcTypeInfo( const FlightCallOptions& options) { flight_sql_pb::CommandGetXdbcTypeInfo command; - return GetFlightInfoForCommand(*this, options, command); + return GetFlightInfoForCommand(this, options, command); } arrow::Result> FlightSqlClient::GetXdbcTypeInfo( @@ -256,7 +325,27 @@ arrow::Result> FlightSqlClient::GetXdbcTypeInfo( command.set_data_type(data_type); - return GetFlightInfoForCommand(*this, options, command); + return GetFlightInfoForCommand(this, options, command); +} + +arrow::Result> FlightSqlClient::GetXdbcTypeInfoSchema( + const FlightCallOptions& options) { + flight_sql_pb::CommandGetXdbcTypeInfo command; + return GetSchemaForCommand(this, options, command); +} + +arrow::Result> FlightSqlClient::GetSqlInfo( + const FlightCallOptions& options, const std::vector& sql_info) { + flight_sql_pb::CommandGetSqlInfo command; + for (const int& info : sql_info) command.add_info(info); + + return GetFlightInfoForCommand(this, options, command); +} + +arrow::Result> FlightSqlClient::GetSqlInfoSchema( + const FlightCallOptions& options) { + flight_sql_pb::CommandGetSqlInfo command; + return GetSchemaForCommand(this, options, command); } arrow::Result> FlightSqlClient::DoGet( @@ -319,28 +408,24 @@ arrow::Result> FlightSqlClient::Prepare( auto handle = prepared_statement_result.prepared_statement_handle(); return std::make_shared(this, handle, dataset_schema, - parameter_schema, options); + parameter_schema); } -arrow::Result> PreparedStatement::Execute() { +arrow::Result> PreparedStatement::Execute( + const FlightCallOptions& options) { if (is_closed_) { return Status::Invalid("Statement already closed."); } - flight_sql_pb::CommandPreparedStatementQuery execute_query_command; - - execute_query_command.set_prepared_statement_handle(handle_); - - google::protobuf::Any any; - any.PackFrom(execute_query_command); - - const std::string& string = any.SerializeAsString(); - const FlightDescriptor descriptor = FlightDescriptor::Command(string); + flight_sql_pb::CommandPreparedStatementQuery command; + command.set_prepared_statement_handle(handle_); + ARROW_ASSIGN_OR_RAISE(FlightDescriptor descriptor, + GetFlightDescriptorForCommand(command)); if (parameter_binding_ && parameter_binding_->num_rows() > 0) { std::unique_ptr writer; std::unique_ptr reader; - ARROW_RETURN_NOT_OK(client_->DoPut(options_, descriptor, parameter_binding_->schema(), + ARROW_RETURN_NOT_OK(client_->DoPut(options, descriptor, parameter_binding_->schema(), &writer, &reader)); ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*parameter_binding_)); @@ -350,28 +435,30 @@ arrow::Result> PreparedStatement::Execute() { ARROW_RETURN_NOT_OK(reader->ReadMetadata(&buffer)); } - ARROW_ASSIGN_OR_RAISE(auto flight_info, client_->GetFlightInfo(options_, descriptor)); + ARROW_ASSIGN_OR_RAISE(auto flight_info, client_->GetFlightInfo(options, descriptor)); return std::move(flight_info); } -arrow::Result PreparedStatement::ExecuteUpdate() { +arrow::Result PreparedStatement::ExecuteUpdate( + const FlightCallOptions& options) { if (is_closed_) { return Status::Invalid("Statement already closed."); } flight_sql_pb::CommandPreparedStatementUpdate command; command.set_prepared_statement_handle(handle_); - const FlightDescriptor& descriptor = GetFlightDescriptorForCommand(command); + ARROW_ASSIGN_OR_RAISE(FlightDescriptor descriptor, + GetFlightDescriptorForCommand(command)); std::unique_ptr writer; std::unique_ptr reader; if (parameter_binding_ && parameter_binding_->num_rows() > 0) { - ARROW_RETURN_NOT_OK(client_->DoPut(options_, descriptor, parameter_binding_->schema(), + ARROW_RETURN_NOT_OK(client_->DoPut(options, descriptor, parameter_binding_->schema(), &writer, &reader)); ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*parameter_binding_)); } else { const std::shared_ptr schema = arrow::schema({}); - ARROW_RETURN_NOT_OK(client_->DoPut(options_, descriptor, schema, &writer, &reader)); + ARROW_RETURN_NOT_OK(client_->DoPut(options, descriptor, schema, &writer, &reader)); const ArrayVector columns; const auto& record_batch = arrow::RecordBatch::Make(schema, 0, columns); ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*record_batch)); @@ -406,7 +493,20 @@ std::shared_ptr PreparedStatement::parameter_schema() const { return parameter_schema_; } -Status PreparedStatement::Close() { +arrow::Result> PreparedStatement::GetSchema( + const FlightCallOptions& options) { + if (is_closed_) { + return Status::Invalid("Statement already closed"); + } + + flight_sql_pb::CommandPreparedStatementQuery command; + command.set_prepared_statement_handle(handle_); + ARROW_ASSIGN_OR_RAISE(FlightDescriptor descriptor, + GetFlightDescriptorForCommand(command)); + return client_->GetSchema(options, descriptor); +} + +Status PreparedStatement::Close(const FlightCallOptions& options) { if (is_closed_) { return Status::Invalid("Statement already closed."); } @@ -422,7 +522,7 @@ Status PreparedStatement::Close() { std::unique_ptr results; - ARROW_RETURN_NOT_OK(client_->DoAction(options_, action, &results)); + ARROW_RETURN_NOT_OK(client_->DoAction(options, action, &results)); is_closed_ = true; @@ -431,14 +531,6 @@ Status PreparedStatement::Close() { Status FlightSqlClient::Close() { return impl_->Close(); } -arrow::Result> FlightSqlClient::GetSqlInfo( - const FlightCallOptions& options, const std::vector& sql_info) { - flight_sql_pb::CommandGetSqlInfo command; - for (const int& info : sql_info) command.add_info(info); - - return GetFlightInfoForCommand(*this, options, command); -} - } // namespace sql } // namespace flight } // namespace arrow diff --git a/cpp/src/arrow/flight/sql/client.h b/cpp/src/arrow/flight/sql/client.h index 7c8cb640e8d13..26315e0d234fe 100644 --- a/cpp/src/arrow/flight/sql/client.h +++ b/cpp/src/arrow/flight/sql/client.h @@ -54,6 +54,10 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { arrow::Result> Execute(const FlightCallOptions& options, const std::string& query); + /// \brief Get the result set schema from the server. + arrow::Result> GetExecuteSchema( + const FlightCallOptions& options, const std::string& query); + /// \brief Execute an update query on the server. /// \param[in] options RPC-layer hints for this call. /// \param[in] query The query to be executed in the UTF-8 format. @@ -67,6 +71,11 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { arrow::Result> GetCatalogs( const FlightCallOptions& options); + /// \brief Get the catalogs schema from the server (should be + /// identical to SqlSchema::GetCatalogsSchema). + arrow::Result> GetCatalogsSchema( + const FlightCallOptions& options); + /// \brief Request a list of database schemas. /// \param[in] options RPC-layer hints for this call. /// \param[in] catalog The catalog. @@ -76,6 +85,11 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { const FlightCallOptions& options, const std::string* catalog, const std::string* db_schema_filter_pattern); + /// \brief Get the database schemas schema from the server (should be + /// identical to SqlSchema::GetDbSchemasSchema). + arrow::Result> GetDbSchemasSchema( + const FlightCallOptions& options); + /// \brief Given a flight ticket and schema, request to be sent the /// stream. Returns record batch stream reader /// \param[in] options Per-RPC options @@ -99,6 +113,11 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { const std::string* table_filter_pattern, bool include_schema, const std::vector* table_types); + /// \brief Get the tables schema from the server (should be + /// identical to SqlSchema::GetTablesSchema). + arrow::Result> GetTablesSchema( + const FlightCallOptions& options, bool include_schema); + /// \brief Request the primary keys for a table. /// \param[in] options RPC-layer hints for this call. /// \param[in] table_ref The table reference. @@ -106,6 +125,11 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { arrow::Result> GetPrimaryKeys( const FlightCallOptions& options, const TableRef& table_ref); + /// \brief Get the primary keys schema from the server (should be + /// identical to SqlSchema::GetPrimaryKeysSchema). + arrow::Result> GetPrimaryKeysSchema( + const FlightCallOptions& options); + /// \brief Retrieves a description about the foreign key columns that reference the /// primary key columns of the given table. /// \param[in] options RPC-layer hints for this call. @@ -114,6 +138,11 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { arrow::Result> GetExportedKeys( const FlightCallOptions& options, const TableRef& table_ref); + /// \brief Get the exported keys schema from the server (should be + /// identical to SqlSchema::GetExportedKeysSchema). + arrow::Result> GetExportedKeysSchema( + const FlightCallOptions& options); + /// \brief Retrieves the foreign key columns for the given table. /// \param[in] options RPC-layer hints for this call. /// \param[in] table_ref The table reference. @@ -121,6 +150,11 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { arrow::Result> GetImportedKeys( const FlightCallOptions& options, const TableRef& table_ref); + /// \brief Get the imported keys schema from the server (should be + /// identical to SqlSchema::GetImportedKeysSchema). + arrow::Result> GetImportedKeysSchema( + const FlightCallOptions& options); + /// \brief Retrieves a description of the foreign key columns in the given foreign key /// table that reference the primary key or the columns representing a unique /// constraint of the parent table (could be the same or a different table). @@ -132,12 +166,22 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { const FlightCallOptions& options, const TableRef& pk_table_ref, const TableRef& fk_table_ref); + /// \brief Get the cross reference schema from the server (should be + /// identical to SqlSchema::GetCrossReferenceSchema). + arrow::Result> GetCrossReferenceSchema( + const FlightCallOptions& options); + /// \brief Request a list of table types. /// \param[in] options RPC-layer hints for this call. /// \return The FlightInfo describing where to access the dataset. arrow::Result> GetTableTypes( const FlightCallOptions& options); + /// \brief Get the table types schema from the server (should be + /// identical to SqlSchema::GetTableTypesSchema). + arrow::Result> GetTableTypesSchema( + const FlightCallOptions& options); + /// \brief Request the information about all the data types supported. /// \param[in] options RPC-layer hints for this call. /// \return The FlightInfo describing where to access the dataset. @@ -151,6 +195,11 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { arrow::Result> GetXdbcTypeInfo( const FlightCallOptions& options, int data_type); + /// \brief Get the type info schema from the server (should be + /// identical to SqlSchema::GetXdbcTypeInfoSchema). + arrow::Result> GetXdbcTypeInfoSchema( + const FlightCallOptions& options); + /// \brief Request a list of SQL information. /// \param[in] options RPC-layer hints for this call. /// \param[in] sql_info the SQL info required. @@ -158,6 +207,11 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { arrow::Result> GetSqlInfo(const FlightCallOptions& options, const std::vector& sql_info); + /// \brief Get the SQL information schema from the server (should be + /// identical to SqlSchema::GetSqlInfoSchema). + arrow::Result> GetSqlInfoSchema( + const FlightCallOptions& options); + /// \brief Create a prepared statement object. /// \param[in] options RPC-layer hints for this call. /// \param[in] query The query that will be executed. @@ -165,17 +219,18 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient { arrow::Result> Prepare( const FlightCallOptions& options, const std::string& query); - /// \brief Retrieve the FlightInfo. - /// \param[in] options RPC-layer hints for this call. - /// \param[in] descriptor The flight descriptor. - /// \return The flight info with the metadata. - // NOTE: This is public because it is been used by the anonymous - // function GetFlightInfoForCommand. + /// \brief Call the underlying Flight client's GetFlightInfo. virtual arrow::Result> GetFlightInfo( const FlightCallOptions& options, const FlightDescriptor& descriptor) { return impl_->GetFlightInfo(options, descriptor); } + /// \brief Call the underlying Flight client's GetSchema. + virtual arrow::Result> GetSchema( + const FlightCallOptions& options, const FlightDescriptor& descriptor) { + return impl_->GetSchema(options, descriptor); + } + /// \brief Explicitly shut down and clean up the client. Status Close(); @@ -212,10 +267,9 @@ class ARROW_FLIGHT_SQL_EXPORT PreparedStatement { /// \param[in] handle Handle for this prepared statement. /// \param[in] dataset_schema Schema of the resulting dataset. /// \param[in] parameter_schema Schema of the parameters (if any). - /// \param[in] options RPC-layer hints for this call. PreparedStatement(FlightSqlClient* client, std::string handle, std::shared_ptr dataset_schema, - std::shared_ptr parameter_schema, FlightCallOptions options); + std::shared_ptr parameter_schema); /// \brief Default destructor for the PreparedStatement class. /// The destructor will call the Close method from the class in order, @@ -226,11 +280,12 @@ class ARROW_FLIGHT_SQL_EXPORT PreparedStatement { /// \brief Executes the prepared statement query on the server. /// \return A FlightInfo object representing the stream(s) to fetch. - arrow::Result> Execute(); + arrow::Result> Execute( + const FlightCallOptions& options = {}); /// \brief Executes the prepared statement update query on the server. /// \return The number of rows affected. - arrow::Result ExecuteUpdate(); + arrow::Result ExecuteUpdate(const FlightCallOptions& options = {}); /// \brief Retrieve the parameter schema from the query. /// \return The parameter schema from the query. @@ -245,10 +300,15 @@ class ARROW_FLIGHT_SQL_EXPORT PreparedStatement { /// \return Status. Status SetParameters(std::shared_ptr parameter_binding); + /// \brief Re-request the result set schema from the server (should + /// be identical to dataset_schema). + arrow::Result> GetSchema( + const FlightCallOptions& options = {}); + /// \brief Close the prepared statement, so that this PreparedStatement can not used /// anymore and server can free up any resources. /// \return Status. - Status Close(); + Status Close(const FlightCallOptions& options = {}); /// \brief Check if the prepared statement is closed. /// \return The state of the prepared statement. @@ -256,7 +316,6 @@ class ARROW_FLIGHT_SQL_EXPORT PreparedStatement { private: FlightSqlClient* client_; - FlightCallOptions options_; std::string handle_; std::shared_ptr dataset_schema_; std::shared_ptr parameter_schema_; diff --git a/cpp/src/arrow/flight/sql/server.cc b/cpp/src/arrow/flight/sql/server.cc index 0ebe647ba1490..78fbff0c33a4e 100644 --- a/cpp/src/arrow/flight/sql/server.cc +++ b/cpp/src/arrow/flight/sql/server.cc @@ -344,6 +344,72 @@ Status FlightSqlServerBase::GetFlightInfo(const ServerCallContext& context, return Status::Invalid("The defined request is invalid."); } +Status FlightSqlServerBase::GetSchema(const ServerCallContext& context, + const FlightDescriptor& request, + std::unique_ptr* schema) { + google::protobuf::Any any; + if (!any.ParseFromArray(request.cmd.data(), static_cast(request.cmd.size()))) { + return Status::Invalid("Unable to parse command"); + } + + if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(StatementQuery internal_command, + ParseCommandStatementQuery(any)); + ARROW_ASSIGN_OR_RAISE(*schema, + GetSchemaStatement(context, internal_command, request)); + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(PreparedStatementQuery internal_command, + ParseCommandPreparedStatementQuery(any)); + ARROW_ASSIGN_OR_RAISE(*schema, + GetSchemaPreparedStatement(context, internal_command, request)); + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(*schema, SchemaResult::Make(*SqlSchema::GetCatalogsSchema())); + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(*schema, + SchemaResult::Make(*SqlSchema::GetCrossReferenceSchema())); + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(*schema, SchemaResult::Make(*SqlSchema::GetDbSchemasSchema())); + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(*schema, + SchemaResult::Make(*SqlSchema::GetExportedKeysSchema())); + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(*schema, + SchemaResult::Make(*SqlSchema::GetImportedKeysSchema())); + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(*schema, + SchemaResult::Make(*SqlSchema::GetPrimaryKeysSchema())); + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(*schema, SchemaResult::Make(*SqlSchema::GetSqlInfoSchema())); + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(GetTables command, ParseCommandGetTables(any)); + if (command.include_schema) { + ARROW_ASSIGN_OR_RAISE( + *schema, SchemaResult::Make(*SqlSchema::GetTablesSchemaWithIncludedSchema())); + } else { + ARROW_ASSIGN_OR_RAISE(*schema, SchemaResult::Make(*SqlSchema::GetTablesSchema())); + } + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(*schema, SchemaResult::Make(*SqlSchema::GetTableTypesSchema())); + return Status::OK(); + } else if (any.Is()) { + ARROW_ASSIGN_OR_RAISE(*schema, + SchemaResult::Make(*SqlSchema::GetXdbcTypeInfoSchema())); + return Status::OK(); + } + + return Status::NotImplemented("Command not recognized: ", any.type_url()); +} + Status FlightSqlServerBase::DoGet(const ServerCallContext& context, const Ticket& request, std::unique_ptr* stream) { google::protobuf::Any any; @@ -531,6 +597,12 @@ arrow::Result> FlightSqlServerBase::GetFlightInfoSta return Status::NotImplemented("GetFlightInfoStatement not implemented"); } +arrow::Result> FlightSqlServerBase::GetSchemaStatement( + const ServerCallContext& context, const StatementQuery& command, + const FlightDescriptor& descriptor) { + return Status::NotImplemented("GetSchemaStatement not implemented"); +} + arrow::Result> FlightSqlServerBase::DoGetStatement( const ServerCallContext& context, const StatementQueryTicket& command) { return Status::NotImplemented("DoGetStatement not implemented"); @@ -543,6 +615,13 @@ FlightSqlServerBase::GetFlightInfoPreparedStatement(const ServerCallContext& con return Status::NotImplemented("GetFlightInfoPreparedStatement not implemented"); } +arrow::Result> +FlightSqlServerBase::GetSchemaPreparedStatement(const ServerCallContext& context, + const PreparedStatementQuery& command, + const FlightDescriptor& descriptor) { + return Status::NotImplemented("GetSchemaPreparedStatement not implemented"); +} + arrow::Result> FlightSqlServerBase::DoGetPreparedStatement(const ServerCallContext& context, const PreparedStatementQuery& command) { diff --git a/cpp/src/arrow/flight/sql/server.h b/cpp/src/arrow/flight/sql/server.h index f077c5d5d5d1f..49e239a0cddd4 100644 --- a/cpp/src/arrow/flight/sql/server.h +++ b/cpp/src/arrow/flight/sql/server.h @@ -28,6 +28,7 @@ #include "arrow/flight/sql/server.h" #include "arrow/flight/sql/types.h" #include "arrow/flight/sql/visibility.h" +#include "arrow/flight/types.h" #include "arrow/util/optional.h" namespace arrow { @@ -221,6 +222,25 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlServerBase : public FlightServerBase { virtual arrow::Result> GetFlightInfoCatalogs( const ServerCallContext& context, const FlightDescriptor& descriptor); + /// \brief Get the schema of the result set of a query. + /// \param[in] context Per-call context. + /// \param[in] command The StatementQuery containing the SQL query. + /// \param[in] descriptor The descriptor identifying the data stream. + /// \return The schema of the result set. + virtual arrow::Result> GetSchemaStatement( + const ServerCallContext& context, const StatementQuery& command, + const FlightDescriptor& descriptor); + + /// \brief Get the schema of the result set of a prepared statement. + /// \param[in] context Per-call context. + /// \param[in] command The PreparedStatementQuery containing the + /// prepared statement handle. + /// \param[in] descriptor The descriptor identifying the data stream. + /// \return The schema of the result set. + virtual arrow::Result> GetSchemaPreparedStatement( + const ServerCallContext& context, const PreparedStatementQuery& command, + const FlightDescriptor& descriptor); + /// \brief Get a FlightDataStream containing the list of catalogs. /// \param[in] context Per-call context. /// \return An interface for sending data back to the client. @@ -462,6 +482,9 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlServerBase : public FlightServerBase { Status GetFlightInfo(const ServerCallContext& context, const FlightDescriptor& request, std::unique_ptr* info) final; + Status GetSchema(const ServerCallContext& context, const FlightDescriptor& request, + std::unique_ptr* schema) override; + Status DoGet(const ServerCallContext& context, const Ticket& request, std::unique_ptr* stream) final; diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc index ddb8a036fbc42..6e80f40cfbf38 100644 --- a/cpp/src/arrow/flight/types.cc +++ b/cpp/src/arrow/flight/types.cc @@ -28,6 +28,7 @@ #include "arrow/ipc/reader.h" #include "arrow/status.h" #include "arrow/table.h" +#include "arrow/util/make_unique.h" #include "arrow/util/string_view.h" #include "arrow/util/uri.h" @@ -150,10 +151,10 @@ arrow::Result> SchemaResult::GetSchema( return ipc::ReadSchema(&schema_reader, dictionary_memo); } -arrow::Result SchemaResult::Make(const Schema& schema) { +arrow::Result> SchemaResult::Make(const Schema& schema) { std::string schema_in; RETURN_NOT_OK(internal::SchemaToString(schema, &schema_in)); - return SchemaResult(std::move(schema_in)); + return arrow::internal::make_unique(std::move(schema_in)); } Status SchemaResult::GetSchema(ipc::DictionaryMemo* dictionary_memo, diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h index a061f33afec0b..2ec24ff586851 100644 --- a/cpp/src/arrow/flight/types.h +++ b/cpp/src/arrow/flight/types.h @@ -397,7 +397,7 @@ struct ARROW_FLIGHT_EXPORT SchemaResult { explicit SchemaResult(std::string schema) : raw_schema_(std::move(schema)) {} /// \brief Factory method to construct a SchemaResult. - static arrow::Result Make(const Schema& schema); + static arrow::Result> Make(const Schema& schema); /// \brief return schema /// \param[in,out] dictionary_memo for dictionary bookkeeping, will diff --git a/cpp/src/arrow/gpu/cuda_context.h b/cpp/src/arrow/gpu/cuda_context.h index 00bcf94c8b6ab..0115ed19a103d 100644 --- a/cpp/src/arrow/gpu/cuda_context.h +++ b/cpp/src/arrow/gpu/cuda_context.h @@ -76,7 +76,7 @@ class ARROW_EXPORT CudaDeviceManager { static std::unique_ptr instance_; class Impl; - std::unique_ptr impl_; + std::shared_ptr impl_; friend class CudaContext; friend class CudaDevice; @@ -146,7 +146,7 @@ class ARROW_EXPORT CudaDevice : public Device { /// \endcond explicit CudaDevice(Impl); - std::unique_ptr impl_; + std::shared_ptr impl_; }; /// \brief Return whether a device instance is a CudaDevice @@ -297,7 +297,7 @@ class ARROW_EXPORT CudaContext : public std::enable_shared_from_this impl_; + std::shared_ptr impl_; friend class CudaBuffer; friend class CudaBufferReader; diff --git a/cpp/src/arrow/gpu/cuda_internal.h b/cpp/src/arrow/gpu/cuda_internal.h index 25eb6e06cee40..d70873634f869 100644 --- a/cpp/src/arrow/gpu/cuda_internal.h +++ b/cpp/src/arrow/gpu/cuda_internal.h @@ -33,6 +33,7 @@ namespace internal { std::string CudaErrorDescription(CUresult err); +ARROW_EXPORT Status StatusFromCuda(CUresult res, const char* function_name = nullptr); #define CU_RETURN_NOT_OK(FUNC_NAME, STMT) \ diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h index 4efd3889407e1..18c23a507805a 100644 --- a/cpp/src/arrow/gpu/cuda_memory.h +++ b/cpp/src/arrow/gpu/cuda_memory.h @@ -250,10 +250,12 @@ Result> AllocateCudaHostBuffer(int device_number const int64_t size); /// Low-level: get a device address through which the CPU data be accessed. +ARROW_EXPORT Result GetDeviceAddress(const uint8_t* cpu_data, const std::shared_ptr& ctx); /// Low-level: get a CPU address through which the device data be accessed. +ARROW_EXPORT Result GetHostAddress(uintptr_t device_ptr); } // namespace cuda diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc index 8dbc9bd5ed56f..1dfb0bdf8ad16 100644 --- a/cpp/src/arrow/io/interfaces.cc +++ b/cpp/src/arrow/io/interfaces.cc @@ -247,8 +247,14 @@ class FileSegmentReader int64_t nbytes_; }; -std::shared_ptr RandomAccessFile::GetStream( +Result> RandomAccessFile::GetStream( std::shared_ptr file, int64_t file_offset, int64_t nbytes) { + if (file_offset < 0) { + return Status::Invalid("file_offset should be a positive value, got: ", file_offset); + } + if (nbytes < 0) { + return Status::Invalid("nbytes should be a positive value, got: ", nbytes); + } return std::make_shared(std::move(file), file_offset, nbytes); } diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h index 0baffc3c5d40f..70c0dd8520fb6 100644 --- a/cpp/src/arrow/io/interfaces.h +++ b/cpp/src/arrow/io/interfaces.h @@ -262,8 +262,8 @@ class ARROW_EXPORT RandomAccessFile : public InputStream, public Seekable { /// \param[in] file_offset the starting position in the file /// \param[in] nbytes the extent of bytes to read. The file should have /// sufficient bytes available - static std::shared_ptr GetStream(std::shared_ptr file, - int64_t file_offset, int64_t nbytes); + static Result> GetStream( + std::shared_ptr file, int64_t file_offset, int64_t nbytes); /// \brief Return the total file size in bytes. /// diff --git a/cpp/src/arrow/io/memory_test.cc b/cpp/src/arrow/io/memory_test.cc index 50335cb6ac8d0..d361243ad6f53 100644 --- a/cpp/src/arrow/io/memory_test.cc +++ b/cpp/src/arrow/io/memory_test.cc @@ -298,8 +298,8 @@ TEST(TestRandomAccessFile, GetStream) { std::shared_ptr stream1, stream2; - stream1 = RandomAccessFile::GetStream(file, 0, 10); - stream2 = RandomAccessFile::GetStream(file, 9, 16); + ASSERT_OK_AND_ASSIGN(stream1, RandomAccessFile::GetStream(file, 0, 10)); + ASSERT_OK_AND_ASSIGN(stream2, RandomAccessFile::GetStream(file, 9, 16)); ASSERT_OK_AND_EQ(0, stream1->Tell()); diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 495018ec09651..4b62bdc3a77f2 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -63,6 +63,10 @@ if(ARROW_BUILD_UTILITIES OR ARROW_BUILD_INTEGRATION) target_link_libraries(arrow-file-to-stream ${ARROW_UTIL_LIB}) add_executable(arrow-stream-to-file stream_to_file.cc) target_link_libraries(arrow-stream-to-file ${ARROW_UTIL_LIB}) + if(ARROW_BUILD_UTILITIES) + install(TARGETS arrow-file-to-stream arrow-stream-to-file ${INSTALL_IS_OPTIONAL} + DESTINATION ${CMAKE_INSTALL_BINDIR}) + endif() if(ARROW_BUILD_INTEGRATION) add_dependencies(arrow-integration arrow-file-to-stream) diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index 45f3313c67f7a..a2aa624d0928a 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -50,10 +50,6 @@ #include "arrow/json/api.h" // IWYU pragma: keep #endif -#ifdef ARROW_PYTHON -#include "arrow/python/api.h" // IWYU pragma: keep -#endif - #ifdef DCHECK #error "DCHECK should not be visible from Arrow public headers." #endif diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt deleted file mode 100644 index c37240a426cfe..0000000000000 --- a/cpp/src/arrow/python/CMakeLists.txt +++ /dev/null @@ -1,208 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# -# arrow_python -# - -find_package(Python3Alt 3.7 REQUIRED) - -add_custom_target(arrow_python-all) -add_custom_target(arrow_python) -add_custom_target(arrow_python-tests) -add_dependencies(arrow_python-all arrow_python arrow_python-tests) - -set(ARROW_PYTHON_SRCS - arrow_to_pandas.cc - benchmark.cc - common.cc - datetime.cc - decimal.cc - deserialize.cc - extension_type.cc - gdb.cc - helpers.cc - inference.cc - init.cc - io.cc - ipc.cc - numpy_convert.cc - numpy_to_arrow.cc - python_to_arrow.cc - pyarrow.cc - serialize.cc - udf.cc) - -set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON - SKIP_UNITY_BUILD_INCLUSION ON) - -if(ARROW_CSV) - list(APPEND ARROW_PYTHON_SRCS csv.cc) -endif() - -if(ARROW_FILESYSTEM) - list(APPEND ARROW_PYTHON_SRCS filesystem.cc) -endif() - -if(PARQUET_REQUIRE_ENCRYPTION) - list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc) -endif() - -set(ARROW_PYTHON_DEPENDENCIES arrow_dependencies) - -if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set_property(SOURCE pyarrow.cc - APPEND_STRING - PROPERTY COMPILE_FLAGS " -Wno-cast-qual ") -endif() - -set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared) -set(ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS) -set(ARROW_PYTHON_STATIC_LINK_LIBS ${PYTHON_OTHER_LIBS}) - -if(WIN32) - list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS ${PYTHON_LIBRARIES} ${PYTHON_OTHER_LIBS}) -endif() -if(PARQUET_REQUIRE_ENCRYPTION) - list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS parquet_shared) -endif() -if(ARROW_USE_XSIMD) - list(APPEND ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS xsimd) - list(APPEND ARROW_PYTHON_STATIC_LINK_LIBS xsimd) -endif() - -set(ARROW_PYTHON_INCLUDES ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS}) - -add_arrow_lib(arrow_python - CMAKE_PACKAGE_NAME - ArrowPython - PKG_CONFIG_NAME - arrow-python - SOURCES - ${ARROW_PYTHON_SRCS} - PRECOMPILED_HEADERS - "$<$:arrow/python/pch.h>" - OUTPUTS - ARROW_PYTHON_LIBRARIES - DEPENDENCIES - ${ARROW_PYTHON_DEPENDENCIES} - SHARED_LINK_FLAGS - ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt - SHARED_LINK_LIBS - ${ARROW_PYTHON_SHARED_LINK_LIBS} - SHARED_PRIVATE_LINK_LIBS - ${ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS} - STATIC_LINK_LIBS - ${ARROW_PYTHON_STATIC_LINK_LIBS} - EXTRA_INCLUDES - "${ARROW_PYTHON_INCLUDES}") - -add_dependencies(arrow_python ${ARROW_PYTHON_LIBRARIES}) - -foreach(LIB_TARGET ${ARROW_PYTHON_LIBRARIES}) - target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYTHON_EXPORTING) -endforeach() - -if(ARROW_BUILD_STATIC AND MSVC) - target_compile_definitions(arrow_python_static PUBLIC ARROW_STATIC) -endif() - -if(ARROW_FLIGHT AND ARROW_BUILD_SHARED) - # Must link to shared libarrow_flight: we don't want to link more than one - # copy of gRPC into the eventual Cython shared object, otherwise gRPC calls - # fail with weird errors due to multiple copies of global static state (The - # other solution is to link gRPC shared everywhere instead of statically only - # in Flight) - add_arrow_lib(arrow_python_flight - CMAKE_PACKAGE_NAME - ArrowPythonFlight - PKG_CONFIG_NAME - arrow-python-flight - SOURCES - flight.cc - OUTPUTS - ARROW_PYFLIGHT_LIBRARIES - DEPENDENCIES - flight_grpc_gen - SHARED_LINK_FLAGS - ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt - SHARED_LINK_LIBS - arrow_python_shared - arrow_flight_shared - STATIC_LINK_LIBS - ${PYTHON_OTHER_LIBS} - EXTRA_INCLUDES - "${ARROW_PYTHON_INCLUDES}" - PRIVATE_INCLUDES - "${Protobuf_INCLUDE_DIRS}") - - add_dependencies(arrow_python ${ARROW_PYFLIGHT_LIBRARIES}) - - foreach(LIB_TARGET ${ARROW_PYFLIGHT_LIBRARIES}) - target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYFLIGHT_EXPORTING) - endforeach() - - if(ARROW_BUILD_STATIC AND MSVC) - target_compile_definitions(arrow_python_flight_static PUBLIC ARROW_STATIC) - endif() -endif() - -if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - # Clang, be quiet. Python C API has lots of macros - set_property(SOURCE ${ARROW_PYTHON_SRCS} - APPEND_STRING - PROPERTY COMPILE_FLAGS -Wno-parentheses-equality) -endif() - -arrow_install_all_headers("arrow/python") - -# ---------------------------------------------------------------------- - -if(ARROW_BUILD_TESTS) - add_library(arrow_python_test_main STATIC util/test_main.cc) - - target_link_libraries(arrow_python_test_main GTest::gtest) - target_include_directories(arrow_python_test_main SYSTEM - PUBLIC ${ARROW_PYTHON_INCLUDES}) - - if(APPLE) - target_link_libraries(arrow_python_test_main ${CMAKE_DL_LIBS}) - set_target_properties(arrow_python_test_main PROPERTIES LINK_FLAGS - "-undefined dynamic_lookup") - elseif(NOT MSVC) - target_link_libraries(arrow_python_test_main pthread ${CMAKE_DL_LIBS}) - endif() - - if(ARROW_TEST_LINKAGE STREQUAL shared) - set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main arrow_python_shared - arrow_testing_shared arrow_shared) - else() - set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main arrow_python_static - arrow_testing_static arrow_static) - endif() - - add_arrow_test(python_test - STATIC_LINK_LIBS - "${ARROW_PYTHON_TEST_LINK_LIBS}" - EXTRA_LINK_LIBS - ${PYTHON_LIBRARIES} - EXTRA_INCLUDES - "${ARROW_PYTHON_INCLUDES}" - LABELS - "arrow_python-tests" - NO_VALGRIND) -endif() diff --git a/cpp/src/arrow/python/api.h b/cpp/src/arrow/python/api.h deleted file mode 100644 index a0b13d6d13013..0000000000000 --- a/cpp/src/arrow/python/api.h +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "arrow/python/arrow_to_pandas.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" -#include "arrow/python/deserialize.h" -#include "arrow/python/helpers.h" -#include "arrow/python/inference.h" -#include "arrow/python/io.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/numpy_to_arrow.h" -#include "arrow/python/python_to_arrow.h" -#include "arrow/python/serialize.h" diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc index ba9a8436908bf..9001a57798cba 100644 --- a/cpp/src/arrow/record_batch.cc +++ b/cpp/src/arrow/record_batch.cc @@ -390,6 +390,15 @@ Result> RecordBatchReader::Make( return std::make_shared(std::move(batches), schema); } +Result> RecordBatchReader::MakeFromIterator( + Iterator> batches, std::shared_ptr schema) { + if (schema == nullptr) { + return Status::Invalid("Schema cannot be nullptr"); + } + + return std::make_shared(std::move(batches), schema); +} + RecordBatchReader::~RecordBatchReader() { ARROW_WARN_NOT_OK(this->Close(), "Implicitly called RecordBatchReader::Close failed"); } diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h index b80c36d78c192..8bc70322560b6 100644 --- a/cpp/src/arrow/record_batch.h +++ b/cpp/src/arrow/record_batch.h @@ -25,6 +25,7 @@ #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" +#include "arrow/util/iterator.h" #include "arrow/util/macros.h" #include "arrow/util/visibility.h" @@ -327,6 +328,13 @@ class ARROW_EXPORT RecordBatchReader { /// element if not provided. static Result> Make( RecordBatchVector batches, std::shared_ptr schema = NULLPTR); + + /// \brief Create a RecordBatchReader from an Iterator of RecordBatch. + /// + /// \param[in] batches an iterator of RecordBatch to read from. + /// \param[in] schema schema that each record batch in iterator will conform to. + static Result> MakeFromIterator( + Iterator> batches, std::shared_ptr schema); }; } // namespace arrow diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc index 21e1cdedc2a5e..5ed92f0947649 100644 --- a/cpp/src/arrow/scalar.cc +++ b/cpp/src/arrow/scalar.cc @@ -1041,6 +1041,20 @@ Status CastImpl(const StructScalar& from, StringScalar* to) { return Status::OK(); } +// list based types (list, large list and map (fixed sized list too)) to string +Status CastImpl(const BaseListScalar& from, StringScalar* to) { + std::stringstream ss; + ss << from.type->ToString() << "["; + for (int64_t i = 0; i < from.value->length(); i++) { + if (i > 0) ss << ", "; + ARROW_ASSIGN_OR_RAISE(auto value, from.value->GetScalar(i)); + ss << value->ToString(); + } + ss << ']'; + to->value = Buffer::FromString(ss.str()); + return Status::OK(); +} + Status CastImpl(const UnionScalar& from, StringScalar* to) { const auto& union_ty = checked_cast(*from.type); std::stringstream ss; diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc index 265ee3e94eb38..bf001fc6fd90e 100644 --- a/cpp/src/arrow/scalar_test.cc +++ b/cpp/src/arrow/scalar_test.cc @@ -1049,11 +1049,15 @@ class TestListScalar : public ::testing::Test { ASSERT_OK(scalar.ValidateFull()); ASSERT_TRUE(scalar.is_valid); AssertTypeEqual(scalar.type, type_); + // list[1, 2, null] + ASSERT_THAT(scalar.ToString(), ::testing::AllOf(::testing::HasSubstr("item: int16"), + ::testing::EndsWith("[1, 2, null]"))); auto null_scalar = CheckMakeNullScalar(type_); ASSERT_OK(null_scalar->ValidateFull()); ASSERT_FALSE(null_scalar->is_valid); AssertTypeEqual(null_scalar->type, type_); + ASSERT_EQ(null_scalar->ToString(), "null"); } void TestValidateErrors() { diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index f3ac2d62d8268..50c02b1a6d46a 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -2123,31 +2123,37 @@ std::string ToString(TimeUnit::type unit); // Helpers to get instances of data types based on general categories +/// \brief Signed integer types ARROW_EXPORT const std::vector>& SignedIntTypes(); +/// \brief Unsigned integer types ARROW_EXPORT const std::vector>& UnsignedIntTypes(); +/// \brief Signed and unsigned integer types ARROW_EXPORT const std::vector>& IntTypes(); +/// \brief Floating point types ARROW_EXPORT const std::vector>& FloatingPointTypes(); -// Number types without boolean +/// \brief Number types without boolean - integer and floating point types ARROW_EXPORT const std::vector>& NumericTypes(); -// Binary and string-like types (except fixed-size binary) +/// \brief Binary and string-like types (except fixed-size binary) ARROW_EXPORT const std::vector>& BaseBinaryTypes(); +/// \brief Binary and large-binary types ARROW_EXPORT const std::vector>& BinaryTypes(); +/// \brief String and large-string types ARROW_EXPORT const std::vector>& StringTypes(); -// Temporal types including time and timestamps for each unit +/// \brief Temporal types including date, time and timestamps for each unit ARROW_EXPORT const std::vector>& TemporalTypes(); -// Interval types +/// \brief Interval types ARROW_EXPORT const std::vector>& IntervalTypes(); -// Integer, floating point, base binary, and temporal +/// \brief Numeric, base binary, date, boolean and null types ARROW_EXPORT const std::vector>& PrimitiveTypes(); diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc index c7ac5f6c7f22e..2d1a0078edadd 100644 --- a/cpp/src/arrow/type_test.cc +++ b/cpp/src/arrow/type_test.cc @@ -1821,4 +1821,47 @@ TEST(TypesTest, TestDecimalEquals) { AssertTypeNotEqual(t5, t10); } +#define TEST_PREDICATE(all_types, type_predicate) \ + for (auto type : all_types) { \ + ASSERT_EQ(type_predicate(type->id()), type_predicate(*type)); \ + } + +TEST(TypesTest, TestMembership) { + std::vector> all_types; + for (auto type : NumericTypes()) { + all_types.push_back(type); + } + for (auto type : TemporalTypes()) { + all_types.push_back(type); + } + for (auto type : IntervalTypes()) { + all_types.push_back(type); + } + for (auto type : PrimitiveTypes()) { + all_types.push_back(type); + } + TEST_PREDICATE(all_types, is_integer); + TEST_PREDICATE(all_types, is_signed_integer); + TEST_PREDICATE(all_types, is_unsigned_integer); + TEST_PREDICATE(all_types, is_floating); + TEST_PREDICATE(all_types, is_numeric); + TEST_PREDICATE(all_types, is_decimal); + TEST_PREDICATE(all_types, is_primitive); + TEST_PREDICATE(all_types, is_base_binary_like); + TEST_PREDICATE(all_types, is_binary_like); + TEST_PREDICATE(all_types, is_large_binary_like); + TEST_PREDICATE(all_types, is_binary); + TEST_PREDICATE(all_types, is_string); + TEST_PREDICATE(all_types, is_temporal); + TEST_PREDICATE(all_types, is_interval); + TEST_PREDICATE(all_types, is_dictionary); + TEST_PREDICATE(all_types, is_fixed_size_binary); + TEST_PREDICATE(all_types, is_fixed_width); + TEST_PREDICATE(all_types, is_list_like); + TEST_PREDICATE(all_types, is_nested); + TEST_PREDICATE(all_types, is_union); +} + +#undef TEST_PREDICATE + } // namespace arrow diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 221b35ce57323..66da3cadcb592 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -846,6 +846,11 @@ using enable_if_physical_floating_point = /// \addtogroup runtime-type-predicates /// @{ + +/// \brief Check for an integer type (signed or unsigned) +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is an integer type one static inline bool is_integer(Type::type type_id) { switch (type_id) { case Type::UINT8: @@ -863,6 +868,10 @@ static inline bool is_integer(Type::type type_id) { return false; } +/// \brief Check for a signed integer type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a signed integer type one static inline bool is_signed_integer(Type::type type_id) { switch (type_id) { case Type::INT8: @@ -876,6 +885,10 @@ static inline bool is_signed_integer(Type::type type_id) { return false; } +/// \brief Check for an unsigned integer type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is an unsigned integer type one static inline bool is_unsigned_integer(Type::type type_id) { switch (type_id) { case Type::UINT8: @@ -889,6 +902,10 @@ static inline bool is_unsigned_integer(Type::type type_id) { return false; } +/// \brief Check for a floating point type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a floating point type one static inline bool is_floating(Type::type type_id) { switch (type_id) { case Type::HALF_FLOAT: @@ -901,6 +918,36 @@ static inline bool is_floating(Type::type type_id) { return false; } +/// \brief Check for a numeric type +/// +/// This predicate doesn't match decimals (see `is_decimal`). +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a numeric type one +static inline bool is_numeric(Type::type type_id) { + switch (type_id) { + case Type::UINT8: + case Type::INT8: + case Type::UINT16: + case Type::INT16: + case Type::UINT32: + case Type::INT32: + case Type::UINT64: + case Type::INT64: + case Type::HALF_FLOAT: + case Type::FLOAT: + case Type::DOUBLE: + return true; + default: + break; + } + return false; +} + +/// \brief Check for a decimal type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a decimal type one static inline bool is_decimal(Type::type type_id) { switch (type_id) { case Type::DECIMAL128: @@ -912,6 +959,12 @@ static inline bool is_decimal(Type::type type_id) { return false; } +/// \brief Check for a primitive type +/// +/// This predicate doesn't match null, decimals and binary-like types. +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a primitive type one static inline bool is_primitive(Type::type type_id) { switch (type_id) { case Type::BOOL: @@ -942,6 +995,13 @@ static inline bool is_primitive(Type::type type_id) { return false; } +/// \brief Check for a base-binary-like type +/// +/// This predicate doesn't match fixed-size binary types and will otherwise +/// match all binary- and string-like types regardless of offset width. +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a base-binary-like type one static inline bool is_base_binary_like(Type::type type_id) { switch (type_id) { case Type::BINARY: @@ -955,6 +1015,10 @@ static inline bool is_base_binary_like(Type::type type_id) { return false; } +/// \brief Check for a binary-like type (i.e. with 32-bit offsets) +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a binary-like type one static inline bool is_binary_like(Type::type type_id) { switch (type_id) { case Type::BINARY: @@ -966,6 +1030,10 @@ static inline bool is_binary_like(Type::type type_id) { return false; } +/// \brief Check for a large-binary-like type (i.e. with 64-bit offsets) +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a large-binary-like type one static inline bool is_large_binary_like(Type::type type_id) { switch (type_id) { case Type::LARGE_BINARY: @@ -977,10 +1045,83 @@ static inline bool is_large_binary_like(Type::type type_id) { return false; } +/// \brief Check for a binary (non-string) type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a binary type one +static inline bool is_binary(Type::type type_id) { + switch (type_id) { + case Type::BINARY: + case Type::LARGE_BINARY: + return true; + default: + break; + } + return false; +} + +/// \brief Check for a string type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a string type one +static inline bool is_string(Type::type type_id) { + switch (type_id) { + case Type::STRING: + case Type::LARGE_STRING: + return true; + default: + break; + } + return false; +} + +/// \brief Check for a temporal type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a temporal type one +static inline bool is_temporal(Type::type type_id) { + switch (type_id) { + case Type::DATE32: + case Type::DATE64: + case Type::TIME32: + case Type::TIME64: + case Type::TIMESTAMP: + return true; + default: + break; + } + return false; +} + +/// \brief Check for an interval type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is an interval type one +static inline bool is_interval(Type::type type_id) { + switch (type_id) { + case Type::INTERVAL_MONTHS: + case Type::INTERVAL_DAY_TIME: + case Type::INTERVAL_MONTH_DAY_NANO: + return true; + default: + break; + } + return false; +} + +/// \brief Check for a dictionary type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a dictionary type one static inline bool is_dictionary(Type::type type_id) { return type_id == Type::DICTIONARY; } +/// \brief Check for a fixed-size-binary type +/// +/// This predicate also matches decimals. +/// \param[in] type_id the type-id to check +/// \return whether type-id is a fixed-size-binary type one static inline bool is_fixed_size_binary(Type::type type_id) { switch (type_id) { case Type::DECIMAL128: @@ -993,10 +1134,73 @@ static inline bool is_fixed_size_binary(Type::type type_id) { return false; } +/// \brief Check for a fixed-width type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a fixed-width type one static inline bool is_fixed_width(Type::type type_id) { return is_primitive(type_id) || is_dictionary(type_id) || is_fixed_size_binary(type_id); } +/// \brief Check for a list-like type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a list-like type one +static inline bool is_list_like(Type::type type_id) { + switch (type_id) { + case Type::LIST: + case Type::LARGE_LIST: + case Type::FIXED_SIZE_LIST: + case Type::MAP: + return true; + default: + break; + } + return false; +} + +/// \brief Check for a nested type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a nested type one +static inline bool is_nested(Type::type type_id) { + switch (type_id) { + case Type::LIST: + case Type::LARGE_LIST: + case Type::FIXED_SIZE_LIST: + case Type::MAP: + case Type::STRUCT: + case Type::SPARSE_UNION: + case Type::DENSE_UNION: + return true; + default: + break; + } + return false; +} + +/// \brief Check for a union type +/// +/// \param[in] type_id the type-id to check +/// \return whether type-id is a union type one +static inline bool is_union(Type::type type_id) { + switch (type_id) { + case Type::SPARSE_UNION: + case Type::DENSE_UNION: + return true; + default: + break; + } + return false; +} + +/// \brief Return the values bit width of a type +/// +/// \param[in] type_id the type-id to check +/// \return the values bit width, or 0 if the type does not have fixed-width values +/// +/// For Type::FIXED_SIZE_BINARY, you will instead need to inspect the concrete +/// DataType to get this information. static inline int bit_width(Type::type type_id) { switch (type_id) { case Type::BOOL: @@ -1045,46 +1249,10 @@ static inline int bit_width(Type::type type_id) { return 0; } -static inline bool is_list_like(Type::type type_id) { - switch (type_id) { - case Type::LIST: - case Type::LARGE_LIST: - case Type::FIXED_SIZE_LIST: - case Type::MAP: - return true; - default: - break; - } - return false; -} - -static inline bool is_nested(Type::type type_id) { - switch (type_id) { - case Type::LIST: - case Type::LARGE_LIST: - case Type::FIXED_SIZE_LIST: - case Type::MAP: - case Type::STRUCT: - case Type::SPARSE_UNION: - case Type::DENSE_UNION: - return true; - default: - break; - } - return false; -} - -static inline bool is_union(Type::type type_id) { - switch (type_id) { - case Type::SPARSE_UNION: - case Type::DENSE_UNION: - return true; - default: - break; - } - return false; -} - +/// \brief Return the offsets bit width of a type +/// +/// \param[in] type_id the type-id to check +/// \return the offsets bit width, or 0 if the type does not have offsets static inline int offset_bit_width(Type::type type_id) { switch (type_id) { case Type::STRING: @@ -1103,6 +1271,182 @@ static inline int offset_bit_width(Type::type type_id) { return 0; } +/// \brief Check for an integer type (signed or unsigned) +/// +/// \param[in] type the type to check +/// \return whether type is an integer type +/// +/// Convenience for checking using the type's id +static inline bool is_integer(const DataType& type) { return is_integer(type.id()); } + +/// \brief Check for a signed integer type +/// +/// \param[in] type the type to check +/// \return whether type is a signed integer type +/// +/// Convenience for checking using the type's id +static inline bool is_signed_integer(const DataType& type) { + return is_signed_integer(type.id()); +} + +/// \brief Check for an unsigned integer type +/// +/// \param[in] type the type to check +/// \return whether type is an unsigned integer type +/// +/// Convenience for checking using the type's id +static inline bool is_unsigned_integer(const DataType& type) { + return is_unsigned_integer(type.id()); +} + +/// \brief Check for a floating point type +/// +/// \param[in] type the type to check +/// \return whether type is a floating point type +/// +/// Convenience for checking using the type's id +static inline bool is_floating(const DataType& type) { return is_floating(type.id()); } + +/// \brief Check for a numeric type (number except boolean type) +/// +/// \param[in] type the type to check +/// \return whether type is a numeric type +/// +/// Convenience for checking using the type's id +static inline bool is_numeric(const DataType& type) { return is_numeric(type.id()); } + +/// \brief Check for a decimal type +/// +/// \param[in] type the type to check +/// \return whether type is a decimal type +/// +/// Convenience for checking using the type's id +static inline bool is_decimal(const DataType& type) { return is_decimal(type.id()); } + +/// \brief Check for a primitive type +/// +/// \param[in] type the type to check +/// \return whether type is a primitive type +/// +/// Convenience for checking using the type's id +static inline bool is_primitive(const DataType& type) { return is_primitive(type.id()); } + +/// \brief Check for a binary or string-like type (except fixed-size binary) +/// +/// \param[in] type the type to check +/// \return whether type is a binary or string-like type +/// +/// Convenience for checking using the type's id +static inline bool is_base_binary_like(const DataType& type) { + return is_base_binary_like(type.id()); +} + +/// \brief Check for a binary-like type +/// +/// \param[in] type the type to check +/// \return whether type is a binary-like type +/// +/// Convenience for checking using the type's id +static inline bool is_binary_like(const DataType& type) { + return is_binary_like(type.id()); +} + +/// \brief Check for a large-binary-like type +/// +/// \param[in] type the type to check +/// \return whether type is a large-binary-like type +/// +/// Convenience for checking using the type's id +static inline bool is_large_binary_like(const DataType& type) { + return is_large_binary_like(type.id()); +} + +/// \brief Check for a binary type +/// +/// \param[in] type the type to check +/// \return whether type is a binary type +/// +/// Convenience for checking using the type's id +static inline bool is_binary(const DataType& type) { return is_binary(type.id()); } + +/// \brief Check for a string type +/// +/// \param[in] type the type to check +/// \return whether type is a string type +/// +/// Convenience for checking using the type's id +static inline bool is_string(const DataType& type) { return is_string(type.id()); } + +/// \brief Check for a temporal type, including time and timestamps for each unit +/// +/// \param[in] type the type to check +/// \return whether type is a temporal type +/// +/// Convenience for checking using the type's id +static inline bool is_temporal(const DataType& type) { return is_temporal(type.id()); } + +/// \brief Check for an interval type +/// +/// \param[in] type the type to check +/// \return whether type is a interval type +/// +/// Convenience for checking using the type's id +static inline bool is_interval(const DataType& type) { return is_interval(type.id()); } + +/// \brief Check for a dictionary type +/// +/// \param[in] type the type to check +/// \return whether type is a dictionary type +/// +/// Convenience for checking using the type's id +static inline bool is_dictionary(const DataType& type) { + return is_dictionary(type.id()); +} + +/// \brief Check for a fixed-size-binary type +/// +/// \param[in] type the type to check +/// \return whether type is a fixed-size-binary type +/// +/// Convenience for checking using the type's id +static inline bool is_fixed_size_binary(const DataType& type) { + return is_fixed_size_binary(type.id()); +} + +/// \brief Check for a fixed-width type +/// +/// \param[in] type the type to check +/// \return whether type is a fixed-width type +/// +/// Convenience for checking using the type's id +static inline bool is_fixed_width(const DataType& type) { + return is_fixed_width(type.id()); +} + +/// \brief Check for a list-like type +/// +/// \param[in] type the type to check +/// \return whether type is a list-like type +/// +/// Convenience for checking using the type's id +static inline bool is_list_like(const DataType& type) { return is_list_like(type.id()); } + +/// \brief Check for a nested type +/// +/// \param[in] type the type to check +/// \return whether type is a nested type +/// +/// Convenience for checking using the type's id +static inline bool is_nested(const DataType& type) { return is_nested(type.id()); } + +/// \brief Check for a union type +/// +/// \param[in] type the type to check +/// \return whether type is a union type +/// +/// Convenience for checking using the type's id +static inline bool is_union(const DataType& type) { return is_union(type.id()); } + /// @} } // namespace arrow diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 8e95d014628a3..3bcb4ceea6303 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -150,9 +150,7 @@ static void BenchmarkAndImpl(benchmark::State& state, DoAnd&& do_and) { for (auto _ : state) { do_and({bitmap_1, bitmap_2}, &bitmap_3); - auto total = - internal::CountSetBits(bitmap_3.data(), bitmap_3.offset(), bitmap_3.length()); - benchmark::DoNotOptimize(total); + benchmark::ClobberMemory(); } state.SetBytesProcessed(state.iterations() * nbytes); } diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake index c4a2a40be2715..c987a0cae367e 100644 --- a/cpp/src/arrow/util/config.h.cmake +++ b/cpp/src/arrow/util/config.h.cmake @@ -50,6 +50,7 @@ #cmakedefine ARROW_GCS #cmakedefine ARROW_S3 #cmakedefine ARROW_USE_NATIVE_INT128 +#cmakedefine ARROW_WITH_MUSL #cmakedefine ARROW_WITH_OPENTELEMETRY #cmakedefine ARROW_WITH_UCX diff --git a/cpp/src/arrow/util/cpu_info.cc b/cpp/src/arrow/util/cpu_info.cc index 3ba8db216e78b..fbe55aec0c157 100644 --- a/cpp/src/arrow/util/cpu_info.cc +++ b/cpp/src/arrow/util/cpu_info.cc @@ -111,7 +111,8 @@ void OsRetrieveCacheSize(std::array* cache_sizes) { if (RelationCache == buffer_position->Relationship) { PCACHE_DESCRIPTOR cache = &buffer_position->Cache; if (cache->Level >= 1 && cache->Level <= kCacheLevels) { - (*cache_sizes)[cache->Level - 1] += cache->Size; + const int64_t current = (*cache_sizes)[cache->Level - 1]; + (*cache_sizes)[cache->Level - 1] = std::max(current, cache->Size); } } offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); @@ -295,35 +296,31 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor, //------------------------------ LINUX ------------------------------// // Get cache size, return 0 on error int64_t LinuxGetCacheSize(int level) { - const struct { - int sysconf_name; - const char* sysfs_path; - } kCacheSizeEntries[] = { - { - _SC_LEVEL1_DCACHE_SIZE, - "/sys/devices/system/cpu/cpu0/cache/index0/size", // l1d (index1 is l1i) - }, - { - _SC_LEVEL2_CACHE_SIZE, - "/sys/devices/system/cpu/cpu0/cache/index2/size", // l2 - }, - { - _SC_LEVEL3_CACHE_SIZE, - "/sys/devices/system/cpu/cpu0/cache/index3/size", // l3 - }, + // get cache size by sysconf() +#ifdef _SC_LEVEL1_DCACHE_SIZE + const int kCacheSizeConf[] = { + _SC_LEVEL1_DCACHE_SIZE, + _SC_LEVEL2_CACHE_SIZE, + _SC_LEVEL3_CACHE_SIZE, }; - static_assert(sizeof(kCacheSizeEntries) / sizeof(kCacheSizeEntries[0]) == kCacheLevels, - ""); + static_assert(sizeof(kCacheSizeConf) / sizeof(kCacheSizeConf[0]) == kCacheLevels, ""); - // get cache size by sysconf() errno = 0; - const int64_t cache_size = sysconf(kCacheSizeEntries[level].sysconf_name); + const int64_t cache_size = sysconf(kCacheSizeConf[level]); if (errno == 0 && cache_size > 0) { return cache_size; } +#endif + + // get cache size from sysfs if sysconf() fails or not supported + const char* kCacheSizeSysfs[] = { + "/sys/devices/system/cpu/cpu0/cache/index0/size", // l1d (index1 is l1i) + "/sys/devices/system/cpu/cpu0/cache/index2/size", // l2 + "/sys/devices/system/cpu/cpu0/cache/index3/size", // l3 + }; + static_assert(sizeof(kCacheSizeSysfs) / sizeof(kCacheSizeSysfs[0]) == kCacheLevels, ""); - // get cache size from sysfs if sysconf() fails (it does happen on Arm) - std::ifstream cacheinfo(kCacheSizeEntries[level].sysfs_path, std::ios::in); + std::ifstream cacheinfo(kCacheSizeSysfs[level], std::ios::in); if (!cacheinfo) { return 0; } diff --git a/cpp/src/arrow/util/string.cc b/cpp/src/arrow/util/string.cc index 3a1586005528f..00ab8e64c4757 100644 --- a/cpp/src/arrow/util/string.cc +++ b/cpp/src/arrow/util/string.cc @@ -92,11 +92,16 @@ Status ParseHexValue(const char* data, uint8_t* out) { namespace internal { -std::vector SplitString(util::string_view v, char delimiter) { +std::vector SplitString(util::string_view v, char delimiter, + int64_t limit) { std::vector parts; size_t start = 0, end; while (true) { - end = v.find(delimiter, start); + if (limit > 0 && static_cast(limit - 1) <= parts.size()) { + end = std::string::npos; + } else { + end = v.find(delimiter, start); + } parts.push_back(v.substr(start, end - start)); if (end == std::string::npos) { break; diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h index d2c8ac38eeca6..b2baa0ebedaaf 100644 --- a/cpp/src/arrow/util/string.h +++ b/cpp/src/arrow/util/string.h @@ -45,7 +45,8 @@ namespace internal { /// \brief Split a string with a delimiter ARROW_EXPORT -std::vector SplitString(util::string_view v, char delim); +std::vector SplitString(util::string_view v, char delim, + int64_t limit = 0); /// \brief Join strings with a delimiter ARROW_EXPORT diff --git a/cpp/src/arrow/util/string_test.cc b/cpp/src/arrow/util/string_test.cc index 057d885fcdb75..2aa6fccbd9a0f 100644 --- a/cpp/src/arrow/util/string_test.cc +++ b/cpp/src/arrow/util/string_test.cc @@ -140,5 +140,31 @@ TEST(SplitString, OnlyDemiliter) { EXPECT_EQ(parts[1], ""); } +TEST(SplitString, Limit) { + std::string input = "a:b:c"; + auto parts = SplitString(input, ':', 2); + ASSERT_EQ(parts.size(), 2); + EXPECT_EQ(parts[0], "a"); + EXPECT_EQ(parts[1], "b:c"); +} + +TEST(SplitString, LimitOver) { + std::string input = "a:b:c"; + auto parts = SplitString(input, ':', 4); + ASSERT_EQ(parts.size(), 3); + EXPECT_EQ(parts[0], "a"); + EXPECT_EQ(parts[1], "b"); + EXPECT_EQ(parts[2], "c"); +} + +TEST(SplitString, LimitZero) { + std::string input = "a:b:c"; + auto parts = SplitString(input, ':', 0); + ASSERT_EQ(parts.size(), 3); + EXPECT_EQ(parts[0], "a"); + EXPECT_EQ(parts[1], "b"); + EXPECT_EQ(parts[2], "c"); +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/value_parsing.h b/cpp/src/arrow/util/value_parsing.h index fbbbcf10f00f5..5193f0af750c6 100644 --- a/cpp/src/arrow/util/value_parsing.h +++ b/cpp/src/arrow/util/value_parsing.h @@ -31,6 +31,7 @@ #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/config.h" #include "arrow/util/macros.h" #include "arrow/util/time.h" #include "arrow/util/visibility.h" @@ -770,7 +771,7 @@ static inline bool ParseTimestampISO8601(const char* s, size_t length, return true; } -#ifdef _WIN32 +#if defined(_WIN32) || defined(ARROW_WITH_MUSL) static constexpr bool kStrptimeSupportsZone = false; #else static constexpr bool kStrptimeSupportsZone = true; diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 40fb656bd40b6..cf0f4f9b917c3 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -754,14 +754,13 @@ GANDIVA_EXPORT gdv_timestamp from_utc_timezone_timestamp(gdv_int64 context, gdv_timestamp time_miliseconds, const char* timezone, gdv_int32 length) { - using arrow_vendored::date::make_zoned; using arrow_vendored::date::sys_time; + using arrow_vendored::date::zoned_time; using std::chrono::milliseconds; - sys_time tp{milliseconds{time_miliseconds}}; - const auto utc_tz = make_zoned(std::string("Etc/UTC"), tp); + const sys_time tp{milliseconds{time_miliseconds}}; try { - const auto local_tz = make_zoned(std::string(timezone, length), utc_tz); + const zoned_time local_tz{std::string(timezone, length), tp}; gdv_timestamp offset = local_tz.get_time_zone()->get_info(tp).offset.count() * 1000; return time_miliseconds + static_cast(offset); } catch (...) { diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt index 650b80f6bd9a4..fefabf883657a 100644 --- a/cpp/src/gandiva/precompiled/CMakeLists.txt +++ b/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -31,10 +31,14 @@ set(PRECOMPILED_SRCS ../../arrow/util/basic_decimal.cc) if(MSVC) - # clang pretends to be a particular version of MSVC. 191[0-9] is - # Visual Studio 2017, and the standard library uses C++14 features, - # so we have to use that -std version to get the IR compilation to work - if(MSVC_VERSION MATCHES "^191[0-9]$") + # clang pretends to be a particular version of MSVC. Thestandard + # library uses C++14 features, so we have to use that -std version + # to get the IR compilation to work. + # See https://cmake.org/cmake/help/latest/variable/MSVC_VERSION.html + # for MSVC_VERSION and Visual Studio version. + if(MSVC_VERSION LESS 1930) + set(FMS_COMPATIBILITY 19.20) + elseif(MSVC_VERSION LESS 1920) set(FMS_COMPATIBILITY 19.10) else() message(FATAL_ERROR "Unsupported MSVC_VERSION=${MSVC_VERSION}") diff --git a/cpp/src/generated/Expression_generated.h b/cpp/src/generated/Expression_generated.h deleted file mode 100644 index 730b001497db8..0000000000000 --- a/cpp/src/generated/Expression_generated.h +++ /dev/null @@ -1,1870 +0,0 @@ -// automatically generated by the FlatBuffers compiler, do not modify - - -#ifndef FLATBUFFERS_GENERATED_EXPRESSION_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ -#define FLATBUFFERS_GENERATED_EXPRESSION_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ - -#include "flatbuffers/flatbuffers.h" - -#include "Schema_generated.h" -#include "Literal_generated.h" - -namespace org { -namespace apache { -namespace arrow { -namespace computeir { -namespace flatbuf { - -struct MapKey; -struct MapKeyBuilder; - -struct StructField; -struct StructFieldBuilder; - -struct ArraySubscript; -struct ArraySubscriptBuilder; - -struct ArraySlice; -struct ArraySliceBuilder; - -struct FieldIndex; -struct FieldIndexBuilder; - -struct FieldRef; -struct FieldRefBuilder; - -struct Call; -struct CallBuilder; - -struct CaseFragment; -struct CaseFragmentBuilder; - -struct ConditionalCase; -struct ConditionalCaseBuilder; - -struct SimpleCase; -struct SimpleCaseBuilder; - -struct SortKey; -struct SortKeyBuilder; - -struct Unbounded; -struct UnboundedBuilder; - -struct Preceding; -struct PrecedingBuilder; - -struct Following; -struct FollowingBuilder; - -struct CurrentRow; -struct CurrentRowBuilder; - -struct WindowCall; -struct WindowCallBuilder; - -struct Cast; -struct CastBuilder; - -struct Expression; -struct ExpressionBuilder; - -/// A union of possible dereference operations -enum class Deref : uint8_t { - NONE = 0, - /// Access a value for a given map key - MapKey = 1, - /// Access the value at a struct field - StructField = 2, - /// Access the element at a given index in an array - ArraySubscript = 3, - /// Access a range of elements in an array - ArraySlice = 4, - /// Access a field of a relation - FieldIndex = 5, - MIN = NONE, - MAX = FieldIndex -}; - -inline const Deref (&EnumValuesDeref())[6] { - static const Deref values[] = { - Deref::NONE, - Deref::MapKey, - Deref::StructField, - Deref::ArraySubscript, - Deref::ArraySlice, - Deref::FieldIndex - }; - return values; -} - -inline const char * const *EnumNamesDeref() { - static const char * const names[7] = { - "NONE", - "MapKey", - "StructField", - "ArraySubscript", - "ArraySlice", - "FieldIndex", - nullptr - }; - return names; -} - -inline const char *EnumNameDeref(Deref e) { - if (flatbuffers::IsOutRange(e, Deref::NONE, Deref::FieldIndex)) return ""; - const size_t index = static_cast(e); - return EnumNamesDeref()[index]; -} - -template struct DerefTraits { - static const Deref enum_value = Deref::NONE; -}; - -template<> struct DerefTraits { - static const Deref enum_value = Deref::MapKey; -}; - -template<> struct DerefTraits { - static const Deref enum_value = Deref::StructField; -}; - -template<> struct DerefTraits { - static const Deref enum_value = Deref::ArraySubscript; -}; - -template<> struct DerefTraits { - static const Deref enum_value = Deref::ArraySlice; -}; - -template<> struct DerefTraits { - static const Deref enum_value = Deref::FieldIndex; -}; - -bool VerifyDeref(flatbuffers::Verifier &verifier, const void *obj, Deref type); -bool VerifyDerefVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); - -/// Whether lesser values should precede greater or vice versa, -/// also whether nulls should preced or follow values -enum class Ordering : uint8_t { - ASCENDING_THEN_NULLS = 0, - DESCENDING_THEN_NULLS = 1, - NULLS_THEN_ASCENDING = 2, - NULLS_THEN_DESCENDING = 3, - MIN = ASCENDING_THEN_NULLS, - MAX = NULLS_THEN_DESCENDING -}; - -inline const Ordering (&EnumValuesOrdering())[4] { - static const Ordering values[] = { - Ordering::ASCENDING_THEN_NULLS, - Ordering::DESCENDING_THEN_NULLS, - Ordering::NULLS_THEN_ASCENDING, - Ordering::NULLS_THEN_DESCENDING - }; - return values; -} - -inline const char * const *EnumNamesOrdering() { - static const char * const names[5] = { - "ASCENDING_THEN_NULLS", - "DESCENDING_THEN_NULLS", - "NULLS_THEN_ASCENDING", - "NULLS_THEN_DESCENDING", - nullptr - }; - return names; -} - -inline const char *EnumNameOrdering(Ordering e) { - if (flatbuffers::IsOutRange(e, Ordering::ASCENDING_THEN_NULLS, Ordering::NULLS_THEN_DESCENDING)) return ""; - const size_t index = static_cast(e); - return EnumNamesOrdering()[index]; -} - -/// A concrete bound, which can be an expression or unbounded -enum class ConcreteBoundImpl : uint8_t { - NONE = 0, - Expression = 1, - Unbounded = 2, - MIN = NONE, - MAX = Unbounded -}; - -inline const ConcreteBoundImpl (&EnumValuesConcreteBoundImpl())[3] { - static const ConcreteBoundImpl values[] = { - ConcreteBoundImpl::NONE, - ConcreteBoundImpl::Expression, - ConcreteBoundImpl::Unbounded - }; - return values; -} - -inline const char * const *EnumNamesConcreteBoundImpl() { - static const char * const names[4] = { - "NONE", - "Expression", - "Unbounded", - nullptr - }; - return names; -} - -inline const char *EnumNameConcreteBoundImpl(ConcreteBoundImpl e) { - if (flatbuffers::IsOutRange(e, ConcreteBoundImpl::NONE, ConcreteBoundImpl::Unbounded)) return ""; - const size_t index = static_cast(e); - return EnumNamesConcreteBoundImpl()[index]; -} - -template struct ConcreteBoundImplTraits { - static const ConcreteBoundImpl enum_value = ConcreteBoundImpl::NONE; -}; - -template<> struct ConcreteBoundImplTraits { - static const ConcreteBoundImpl enum_value = ConcreteBoundImpl::Expression; -}; - -template<> struct ConcreteBoundImplTraits { - static const ConcreteBoundImpl enum_value = ConcreteBoundImpl::Unbounded; -}; - -bool VerifyConcreteBoundImpl(flatbuffers::Verifier &verifier, const void *obj, ConcreteBoundImpl type); -bool VerifyConcreteBoundImplVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); - -enum class Bound : uint8_t { - NONE = 0, - Preceding = 1, - Following = 2, - CurrentRow = 3, - MIN = NONE, - MAX = CurrentRow -}; - -inline const Bound (&EnumValuesBound())[4] { - static const Bound values[] = { - Bound::NONE, - Bound::Preceding, - Bound::Following, - Bound::CurrentRow - }; - return values; -} - -inline const char * const *EnumNamesBound() { - static const char * const names[5] = { - "NONE", - "Preceding", - "Following", - "CurrentRow", - nullptr - }; - return names; -} - -inline const char *EnumNameBound(Bound e) { - if (flatbuffers::IsOutRange(e, Bound::NONE, Bound::CurrentRow)) return ""; - const size_t index = static_cast(e); - return EnumNamesBound()[index]; -} - -template struct BoundTraits { - static const Bound enum_value = Bound::NONE; -}; - -template<> struct BoundTraits { - static const Bound enum_value = Bound::Preceding; -}; - -template<> struct BoundTraits { - static const Bound enum_value = Bound::Following; -}; - -template<> struct BoundTraits { - static const Bound enum_value = Bound::CurrentRow; -}; - -bool VerifyBound(flatbuffers::Verifier &verifier, const void *obj, Bound type); -bool VerifyBoundVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); - -/// The kind of window function to be executed -enum class Frame : uint8_t { - Rows = 0, - Range = 1, - MIN = Rows, - MAX = Range -}; - -inline const Frame (&EnumValuesFrame())[2] { - static const Frame values[] = { - Frame::Rows, - Frame::Range - }; - return values; -} - -inline const char * const *EnumNamesFrame() { - static const char * const names[3] = { - "Rows", - "Range", - nullptr - }; - return names; -} - -inline const char *EnumNameFrame(Frame e) { - if (flatbuffers::IsOutRange(e, Frame::Rows, Frame::Range)) return ""; - const size_t index = static_cast(e); - return EnumNamesFrame()[index]; -} - -/// Various expression types -/// -/// WindowCall is a separate variant -/// due to special options for each that don't apply to generic -/// function calls. Again this is done to make it easier -/// for consumers to deal with the structure of the operation -enum class ExpressionImpl : uint8_t { - NONE = 0, - Literal = 1, - FieldRef = 2, - Call = 3, - ConditionalCase = 4, - SimpleCase = 5, - WindowCall = 6, - Cast = 7, - MIN = NONE, - MAX = Cast -}; - -inline const ExpressionImpl (&EnumValuesExpressionImpl())[8] { - static const ExpressionImpl values[] = { - ExpressionImpl::NONE, - ExpressionImpl::Literal, - ExpressionImpl::FieldRef, - ExpressionImpl::Call, - ExpressionImpl::ConditionalCase, - ExpressionImpl::SimpleCase, - ExpressionImpl::WindowCall, - ExpressionImpl::Cast - }; - return values; -} - -inline const char * const *EnumNamesExpressionImpl() { - static const char * const names[9] = { - "NONE", - "Literal", - "FieldRef", - "Call", - "ConditionalCase", - "SimpleCase", - "WindowCall", - "Cast", - nullptr - }; - return names; -} - -inline const char *EnumNameExpressionImpl(ExpressionImpl e) { - if (flatbuffers::IsOutRange(e, ExpressionImpl::NONE, ExpressionImpl::Cast)) return ""; - const size_t index = static_cast(e); - return EnumNamesExpressionImpl()[index]; -} - -template struct ExpressionImplTraits { - static const ExpressionImpl enum_value = ExpressionImpl::NONE; -}; - -template<> struct ExpressionImplTraits { - static const ExpressionImpl enum_value = ExpressionImpl::Literal; -}; - -template<> struct ExpressionImplTraits { - static const ExpressionImpl enum_value = ExpressionImpl::FieldRef; -}; - -template<> struct ExpressionImplTraits { - static const ExpressionImpl enum_value = ExpressionImpl::Call; -}; - -template<> struct ExpressionImplTraits { - static const ExpressionImpl enum_value = ExpressionImpl::ConditionalCase; -}; - -template<> struct ExpressionImplTraits { - static const ExpressionImpl enum_value = ExpressionImpl::SimpleCase; -}; - -template<> struct ExpressionImplTraits { - static const ExpressionImpl enum_value = ExpressionImpl::WindowCall; -}; - -template<> struct ExpressionImplTraits { - static const ExpressionImpl enum_value = ExpressionImpl::Cast; -}; - -bool VerifyExpressionImpl(flatbuffers::Verifier &verifier, const void *obj, ExpressionImpl type); -bool VerifyExpressionImplVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); - -/// Access a value for a given map key -struct MapKey FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef MapKeyBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_KEY = 4 - }; - /// Any expression can be a map key. - const org::apache::arrow::computeir::flatbuf::Expression *key() const { - return GetPointer(VT_KEY); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_KEY) && - verifier.VerifyTable(key()) && - verifier.EndTable(); - } -}; - -struct MapKeyBuilder { - typedef MapKey Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_key(flatbuffers::Offset key) { - fbb_.AddOffset(MapKey::VT_KEY, key); - } - explicit MapKeyBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - MapKeyBuilder &operator=(const MapKeyBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, MapKey::VT_KEY); - return o; - } -}; - -inline flatbuffers::Offset CreateMapKey( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset key = 0) { - MapKeyBuilder builder_(_fbb); - builder_.add_key(key); - return builder_.Finish(); -} - -/// Struct field access -struct StructField FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef StructFieldBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_POSITION = 4 - }; - /// The position of the field in the struct schema - uint32_t position() const { - return GetField(VT_POSITION, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_POSITION) && - verifier.EndTable(); - } -}; - -struct StructFieldBuilder { - typedef StructField Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_position(uint32_t position) { - fbb_.AddElement(StructField::VT_POSITION, position, 0); - } - explicit StructFieldBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - StructFieldBuilder &operator=(const StructFieldBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateStructField( - flatbuffers::FlatBufferBuilder &_fbb, - uint32_t position = 0) { - StructFieldBuilder builder_(_fbb); - builder_.add_position(position); - return builder_.Finish(); -} - -/// Zero-based array index -struct ArraySubscript FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef ArraySubscriptBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_POSITION = 4 - }; - uint32_t position() const { - return GetField(VT_POSITION, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_POSITION) && - verifier.EndTable(); - } -}; - -struct ArraySubscriptBuilder { - typedef ArraySubscript Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_position(uint32_t position) { - fbb_.AddElement(ArraySubscript::VT_POSITION, position, 0); - } - explicit ArraySubscriptBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - ArraySubscriptBuilder &operator=(const ArraySubscriptBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateArraySubscript( - flatbuffers::FlatBufferBuilder &_fbb, - uint32_t position = 0) { - ArraySubscriptBuilder builder_(_fbb); - builder_.add_position(position); - return builder_.Finish(); -} - -/// Zero-based range of elements in an array -struct ArraySlice FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef ArraySliceBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_START_INCLUSIVE = 4, - VT_END_EXCLUSIVE = 6 - }; - /// The start of an array slice, inclusive - uint32_t start_inclusive() const { - return GetField(VT_START_INCLUSIVE, 0); - } - /// The end of an array slice, exclusive - uint32_t end_exclusive() const { - return GetField(VT_END_EXCLUSIVE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_START_INCLUSIVE) && - VerifyField(verifier, VT_END_EXCLUSIVE) && - verifier.EndTable(); - } -}; - -struct ArraySliceBuilder { - typedef ArraySlice Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_start_inclusive(uint32_t start_inclusive) { - fbb_.AddElement(ArraySlice::VT_START_INCLUSIVE, start_inclusive, 0); - } - void add_end_exclusive(uint32_t end_exclusive) { - fbb_.AddElement(ArraySlice::VT_END_EXCLUSIVE, end_exclusive, 0); - } - explicit ArraySliceBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - ArraySliceBuilder &operator=(const ArraySliceBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateArraySlice( - flatbuffers::FlatBufferBuilder &_fbb, - uint32_t start_inclusive = 0, - uint32_t end_exclusive = 0) { - ArraySliceBuilder builder_(_fbb); - builder_.add_end_exclusive(end_exclusive); - builder_.add_start_inclusive(start_inclusive); - return builder_.Finish(); -} - -/// Field name in a relation, in ordinal position of the relation's schema. -struct FieldIndex FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef FieldIndexBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_POSITION = 4 - }; - uint32_t position() const { - return GetField(VT_POSITION, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_POSITION) && - verifier.EndTable(); - } -}; - -struct FieldIndexBuilder { - typedef FieldIndex Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_position(uint32_t position) { - fbb_.AddElement(FieldIndex::VT_POSITION, position, 0); - } - explicit FieldIndexBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - FieldIndexBuilder &operator=(const FieldIndexBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateFieldIndex( - flatbuffers::FlatBufferBuilder &_fbb, - uint32_t position = 0) { - FieldIndexBuilder builder_(_fbb); - builder_.add_position(position); - return builder_.Finish(); -} - -/// Access the data of a field -struct FieldRef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef FieldRefBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_REF_TYPE = 4, - VT_REF = 6, - VT_RELATION_INDEX = 8 - }; - org::apache::arrow::computeir::flatbuf::Deref ref_type() const { - return static_cast(GetField(VT_REF_TYPE, 0)); - } - const void *ref() const { - return GetPointer(VT_REF); - } - template const T *ref_as() const; - const org::apache::arrow::computeir::flatbuf::MapKey *ref_as_MapKey() const { - return ref_type() == org::apache::arrow::computeir::flatbuf::Deref::MapKey ? static_cast(ref()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::StructField *ref_as_StructField() const { - return ref_type() == org::apache::arrow::computeir::flatbuf::Deref::StructField ? static_cast(ref()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::ArraySubscript *ref_as_ArraySubscript() const { - return ref_type() == org::apache::arrow::computeir::flatbuf::Deref::ArraySubscript ? static_cast(ref()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::ArraySlice *ref_as_ArraySlice() const { - return ref_type() == org::apache::arrow::computeir::flatbuf::Deref::ArraySlice ? static_cast(ref()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::FieldIndex *ref_as_FieldIndex() const { - return ref_type() == org::apache::arrow::computeir::flatbuf::Deref::FieldIndex ? static_cast(ref()) : nullptr; - } - /// For Expressions which might reference fields in multiple Relations, - /// this index may be provided to indicate which Relation's fields - /// `ref` points into. For example in the case of a join, - /// 0 refers to the left relation and 1 to the right relation. - int32_t relation_index() const { - return GetField(VT_RELATION_INDEX, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_REF_TYPE) && - VerifyOffsetRequired(verifier, VT_REF) && - VerifyDeref(verifier, ref(), ref_type()) && - VerifyField(verifier, VT_RELATION_INDEX) && - verifier.EndTable(); - } -}; - -template<> inline const org::apache::arrow::computeir::flatbuf::MapKey *FieldRef::ref_as() const { - return ref_as_MapKey(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::StructField *FieldRef::ref_as() const { - return ref_as_StructField(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::ArraySubscript *FieldRef::ref_as() const { - return ref_as_ArraySubscript(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::ArraySlice *FieldRef::ref_as() const { - return ref_as_ArraySlice(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::FieldIndex *FieldRef::ref_as() const { - return ref_as_FieldIndex(); -} - -struct FieldRefBuilder { - typedef FieldRef Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_ref_type(org::apache::arrow::computeir::flatbuf::Deref ref_type) { - fbb_.AddElement(FieldRef::VT_REF_TYPE, static_cast(ref_type), 0); - } - void add_ref(flatbuffers::Offset ref) { - fbb_.AddOffset(FieldRef::VT_REF, ref); - } - void add_relation_index(int32_t relation_index) { - fbb_.AddElement(FieldRef::VT_RELATION_INDEX, relation_index, 0); - } - explicit FieldRefBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - FieldRefBuilder &operator=(const FieldRefBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, FieldRef::VT_REF); - return o; - } -}; - -inline flatbuffers::Offset CreateFieldRef( - flatbuffers::FlatBufferBuilder &_fbb, - org::apache::arrow::computeir::flatbuf::Deref ref_type = org::apache::arrow::computeir::flatbuf::Deref::NONE, - flatbuffers::Offset ref = 0, - int32_t relation_index = 0) { - FieldRefBuilder builder_(_fbb); - builder_.add_relation_index(relation_index); - builder_.add_ref(ref); - builder_.add_ref_type(ref_type); - return builder_.Finish(); -} - -/// A function call expression -struct Call FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef CallBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_NAME = 4, - VT_ARGUMENTS = 6, - VT_ORDERINGS = 8 - }; - /// The function to call - const flatbuffers::String *name() const { - return GetPointer(VT_NAME); - } - /// The arguments passed to `name`. - const flatbuffers::Vector> *arguments() const { - return GetPointer> *>(VT_ARGUMENTS); - } - /// Possible ordering of input. These are useful - /// in aggregates where ordering in meaningful such as - /// string concatenation - const flatbuffers::Vector> *orderings() const { - return GetPointer> *>(VT_ORDERINGS); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_NAME) && - verifier.VerifyString(name()) && - VerifyOffsetRequired(verifier, VT_ARGUMENTS) && - verifier.VerifyVector(arguments()) && - verifier.VerifyVectorOfTables(arguments()) && - VerifyOffset(verifier, VT_ORDERINGS) && - verifier.VerifyVector(orderings()) && - verifier.VerifyVectorOfTables(orderings()) && - verifier.EndTable(); - } -}; - -struct CallBuilder { - typedef Call Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_name(flatbuffers::Offset name) { - fbb_.AddOffset(Call::VT_NAME, name); - } - void add_arguments(flatbuffers::Offset>> arguments) { - fbb_.AddOffset(Call::VT_ARGUMENTS, arguments); - } - void add_orderings(flatbuffers::Offset>> orderings) { - fbb_.AddOffset(Call::VT_ORDERINGS, orderings); - } - explicit CallBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - CallBuilder &operator=(const CallBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Call::VT_NAME); - fbb_.Required(o, Call::VT_ARGUMENTS); - return o; - } -}; - -inline flatbuffers::Offset CreateCall( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset name = 0, - flatbuffers::Offset>> arguments = 0, - flatbuffers::Offset>> orderings = 0) { - CallBuilder builder_(_fbb); - builder_.add_orderings(orderings); - builder_.add_arguments(arguments); - builder_.add_name(name); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateCallDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const char *name = nullptr, - const std::vector> *arguments = nullptr, - const std::vector> *orderings = nullptr) { - auto name__ = name ? _fbb.CreateString(name) : 0; - auto arguments__ = arguments ? _fbb.CreateVector>(*arguments) : 0; - auto orderings__ = orderings ? _fbb.CreateVector>(*orderings) : 0; - return org::apache::arrow::computeir::flatbuf::CreateCall( - _fbb, - name__, - arguments__, - orderings__); -} - -/// A single WHEN x THEN y fragment. -struct CaseFragment FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef CaseFragmentBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_MATCH = 4, - VT_RESULT = 6 - }; - const org::apache::arrow::computeir::flatbuf::Expression *match() const { - return GetPointer(VT_MATCH); - } - const org::apache::arrow::computeir::flatbuf::Expression *result() const { - return GetPointer(VT_RESULT); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_MATCH) && - verifier.VerifyTable(match()) && - VerifyOffsetRequired(verifier, VT_RESULT) && - verifier.VerifyTable(result()) && - verifier.EndTable(); - } -}; - -struct CaseFragmentBuilder { - typedef CaseFragment Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_match(flatbuffers::Offset match) { - fbb_.AddOffset(CaseFragment::VT_MATCH, match); - } - void add_result(flatbuffers::Offset result) { - fbb_.AddOffset(CaseFragment::VT_RESULT, result); - } - explicit CaseFragmentBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - CaseFragmentBuilder &operator=(const CaseFragmentBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, CaseFragment::VT_MATCH); - fbb_.Required(o, CaseFragment::VT_RESULT); - return o; - } -}; - -inline flatbuffers::Offset CreateCaseFragment( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset match = 0, - flatbuffers::Offset result = 0) { - CaseFragmentBuilder builder_(_fbb); - builder_.add_result(result); - builder_.add_match(match); - return builder_.Finish(); -} - -/// Conditional case statement expression -struct ConditionalCase FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef ConditionalCaseBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_CONDITIONS = 4, - VT_ELSE_ = 6 - }; - /// List of conditions to evaluate - const flatbuffers::Vector> *conditions() const { - return GetPointer> *>(VT_CONDITIONS); - } - /// The default value if no cases match. This is typically NULL in SQL - /// implementations. - /// - /// Defaulting to NULL is a frontend choice, so producers must specify NULL - /// if that's their desired behavior. - const org::apache::arrow::computeir::flatbuf::Expression *else_() const { - return GetPointer(VT_ELSE_); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_CONDITIONS) && - verifier.VerifyVector(conditions()) && - verifier.VerifyVectorOfTables(conditions()) && - VerifyOffsetRequired(verifier, VT_ELSE_) && - verifier.VerifyTable(else_()) && - verifier.EndTable(); - } -}; - -struct ConditionalCaseBuilder { - typedef ConditionalCase Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_conditions(flatbuffers::Offset>> conditions) { - fbb_.AddOffset(ConditionalCase::VT_CONDITIONS, conditions); - } - void add_else_(flatbuffers::Offset else_) { - fbb_.AddOffset(ConditionalCase::VT_ELSE_, else_); - } - explicit ConditionalCaseBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - ConditionalCaseBuilder &operator=(const ConditionalCaseBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, ConditionalCase::VT_CONDITIONS); - fbb_.Required(o, ConditionalCase::VT_ELSE_); - return o; - } -}; - -inline flatbuffers::Offset CreateConditionalCase( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> conditions = 0, - flatbuffers::Offset else_ = 0) { - ConditionalCaseBuilder builder_(_fbb); - builder_.add_else_(else_); - builder_.add_conditions(conditions); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateConditionalCaseDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector> *conditions = nullptr, - flatbuffers::Offset else_ = 0) { - auto conditions__ = conditions ? _fbb.CreateVector>(*conditions) : 0; - return org::apache::arrow::computeir::flatbuf::CreateConditionalCase( - _fbb, - conditions__, - else_); -} - -/// Switch-style case expression -struct SimpleCase FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef SimpleCaseBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_EXPRESSION = 4, - VT_MATCHES = 6, - VT_ELSE_ = 8 - }; - /// The expression whose value will be matched - const org::apache::arrow::computeir::flatbuf::Expression *expression() const { - return GetPointer(VT_EXPRESSION); - } - /// Matches for `expression` - const flatbuffers::Vector> *matches() const { - return GetPointer> *>(VT_MATCHES); - } - /// The default value if no cases match - const org::apache::arrow::computeir::flatbuf::Expression *else_() const { - return GetPointer(VT_ELSE_); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_EXPRESSION) && - verifier.VerifyTable(expression()) && - VerifyOffsetRequired(verifier, VT_MATCHES) && - verifier.VerifyVector(matches()) && - verifier.VerifyVectorOfTables(matches()) && - VerifyOffsetRequired(verifier, VT_ELSE_) && - verifier.VerifyTable(else_()) && - verifier.EndTable(); - } -}; - -struct SimpleCaseBuilder { - typedef SimpleCase Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_expression(flatbuffers::Offset expression) { - fbb_.AddOffset(SimpleCase::VT_EXPRESSION, expression); - } - void add_matches(flatbuffers::Offset>> matches) { - fbb_.AddOffset(SimpleCase::VT_MATCHES, matches); - } - void add_else_(flatbuffers::Offset else_) { - fbb_.AddOffset(SimpleCase::VT_ELSE_, else_); - } - explicit SimpleCaseBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - SimpleCaseBuilder &operator=(const SimpleCaseBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, SimpleCase::VT_EXPRESSION); - fbb_.Required(o, SimpleCase::VT_MATCHES); - fbb_.Required(o, SimpleCase::VT_ELSE_); - return o; - } -}; - -inline flatbuffers::Offset CreateSimpleCase( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset expression = 0, - flatbuffers::Offset>> matches = 0, - flatbuffers::Offset else_ = 0) { - SimpleCaseBuilder builder_(_fbb); - builder_.add_else_(else_); - builder_.add_matches(matches); - builder_.add_expression(expression); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateSimpleCaseDirect( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset expression = 0, - const std::vector> *matches = nullptr, - flatbuffers::Offset else_ = 0) { - auto matches__ = matches ? _fbb.CreateVector>(*matches) : 0; - return org::apache::arrow::computeir::flatbuf::CreateSimpleCase( - _fbb, - expression, - matches__, - else_); -} - -/// An expression with an order -struct SortKey FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef SortKeyBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_EXPRESSION = 4, - VT_ORDERING = 6 - }; - const org::apache::arrow::computeir::flatbuf::Expression *expression() const { - return GetPointer(VT_EXPRESSION); - } - org::apache::arrow::computeir::flatbuf::Ordering ordering() const { - return static_cast(GetField(VT_ORDERING, 0)); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_EXPRESSION) && - verifier.VerifyTable(expression()) && - VerifyField(verifier, VT_ORDERING) && - verifier.EndTable(); - } -}; - -struct SortKeyBuilder { - typedef SortKey Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_expression(flatbuffers::Offset expression) { - fbb_.AddOffset(SortKey::VT_EXPRESSION, expression); - } - void add_ordering(org::apache::arrow::computeir::flatbuf::Ordering ordering) { - fbb_.AddElement(SortKey::VT_ORDERING, static_cast(ordering), 0); - } - explicit SortKeyBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - SortKeyBuilder &operator=(const SortKeyBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, SortKey::VT_EXPRESSION); - return o; - } -}; - -inline flatbuffers::Offset CreateSortKey( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset expression = 0, - org::apache::arrow::computeir::flatbuf::Ordering ordering = org::apache::arrow::computeir::flatbuf::Ordering::ASCENDING_THEN_NULLS) { - SortKeyBuilder builder_(_fbb); - builder_.add_expression(expression); - builder_.add_ordering(ordering); - return builder_.Finish(); -} - -/// An unbounded window bound -struct Unbounded FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef UnboundedBuilder Builder; - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - verifier.EndTable(); - } -}; - -struct UnboundedBuilder { - typedef Unbounded Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - explicit UnboundedBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - UnboundedBuilder &operator=(const UnboundedBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateUnbounded( - flatbuffers::FlatBufferBuilder &_fbb) { - UnboundedBuilder builder_(_fbb); - return builder_.Finish(); -} - -/// Boundary is preceding rows, determined by the contained expression -struct Preceding FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef PrecedingBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_IMPL_TYPE = 4, - VT_IMPL = 6 - }; - org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl impl_type() const { - return static_cast(GetField(VT_IMPL_TYPE, 0)); - } - const void *impl() const { - return GetPointer(VT_IMPL); - } - template const T *impl_as() const; - const org::apache::arrow::computeir::flatbuf::Expression *impl_as_Expression() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl::Expression ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Unbounded *impl_as_Unbounded() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl::Unbounded ? static_cast(impl()) : nullptr; - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_IMPL_TYPE) && - VerifyOffsetRequired(verifier, VT_IMPL) && - VerifyConcreteBoundImpl(verifier, impl(), impl_type()) && - verifier.EndTable(); - } -}; - -template<> inline const org::apache::arrow::computeir::flatbuf::Expression *Preceding::impl_as() const { - return impl_as_Expression(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Unbounded *Preceding::impl_as() const { - return impl_as_Unbounded(); -} - -struct PrecedingBuilder { - typedef Preceding Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_impl_type(org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl impl_type) { - fbb_.AddElement(Preceding::VT_IMPL_TYPE, static_cast(impl_type), 0); - } - void add_impl(flatbuffers::Offset impl) { - fbb_.AddOffset(Preceding::VT_IMPL, impl); - } - explicit PrecedingBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - PrecedingBuilder &operator=(const PrecedingBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Preceding::VT_IMPL); - return o; - } -}; - -inline flatbuffers::Offset CreatePreceding( - flatbuffers::FlatBufferBuilder &_fbb, - org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl impl_type = org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl::NONE, - flatbuffers::Offset impl = 0) { - PrecedingBuilder builder_(_fbb); - builder_.add_impl(impl); - builder_.add_impl_type(impl_type); - return builder_.Finish(); -} - -/// Boundary is following rows, determined by the contained expression -struct Following FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef FollowingBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_IMPL_TYPE = 4, - VT_IMPL = 6 - }; - org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl impl_type() const { - return static_cast(GetField(VT_IMPL_TYPE, 0)); - } - const void *impl() const { - return GetPointer(VT_IMPL); - } - template const T *impl_as() const; - const org::apache::arrow::computeir::flatbuf::Expression *impl_as_Expression() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl::Expression ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Unbounded *impl_as_Unbounded() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl::Unbounded ? static_cast(impl()) : nullptr; - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_IMPL_TYPE) && - VerifyOffsetRequired(verifier, VT_IMPL) && - VerifyConcreteBoundImpl(verifier, impl(), impl_type()) && - verifier.EndTable(); - } -}; - -template<> inline const org::apache::arrow::computeir::flatbuf::Expression *Following::impl_as() const { - return impl_as_Expression(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Unbounded *Following::impl_as() const { - return impl_as_Unbounded(); -} - -struct FollowingBuilder { - typedef Following Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_impl_type(org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl impl_type) { - fbb_.AddElement(Following::VT_IMPL_TYPE, static_cast(impl_type), 0); - } - void add_impl(flatbuffers::Offset impl) { - fbb_.AddOffset(Following::VT_IMPL, impl); - } - explicit FollowingBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - FollowingBuilder &operator=(const FollowingBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Following::VT_IMPL); - return o; - } -}; - -inline flatbuffers::Offset CreateFollowing( - flatbuffers::FlatBufferBuilder &_fbb, - org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl impl_type = org::apache::arrow::computeir::flatbuf::ConcreteBoundImpl::NONE, - flatbuffers::Offset impl = 0) { - FollowingBuilder builder_(_fbb); - builder_.add_impl(impl); - builder_.add_impl_type(impl_type); - return builder_.Finish(); -} - -/// Boundary is the current row -struct CurrentRow FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef CurrentRowBuilder Builder; - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - verifier.EndTable(); - } -}; - -struct CurrentRowBuilder { - typedef CurrentRow Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - explicit CurrentRowBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - CurrentRowBuilder &operator=(const CurrentRowBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateCurrentRow( - flatbuffers::FlatBufferBuilder &_fbb) { - CurrentRowBuilder builder_(_fbb); - return builder_.Finish(); -} - -/// An expression representing a window function call. -struct WindowCall FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef WindowCallBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_EXPRESSION = 4, - VT_KIND = 6, - VT_PARTITIONS = 8, - VT_ORDERINGS = 10, - VT_LOWER_BOUND_TYPE = 12, - VT_LOWER_BOUND = 14, - VT_UPPER_BOUND_TYPE = 16, - VT_UPPER_BOUND = 18 - }; - /// The expression to operate over - const org::apache::arrow::computeir::flatbuf::Expression *expression() const { - return GetPointer(VT_EXPRESSION); - } - /// The kind of window frame - org::apache::arrow::computeir::flatbuf::Frame kind() const { - return static_cast(GetField(VT_KIND, 0)); - } - /// Partition keys - const flatbuffers::Vector> *partitions() const { - return GetPointer> *>(VT_PARTITIONS); - } - /// Sort keys - const flatbuffers::Vector> *orderings() const { - return GetPointer> *>(VT_ORDERINGS); - } - org::apache::arrow::computeir::flatbuf::Bound lower_bound_type() const { - return static_cast(GetField(VT_LOWER_BOUND_TYPE, 0)); - } - /// Lower window bound - const void *lower_bound() const { - return GetPointer(VT_LOWER_BOUND); - } - template const T *lower_bound_as() const; - const org::apache::arrow::computeir::flatbuf::Preceding *lower_bound_as_Preceding() const { - return lower_bound_type() == org::apache::arrow::computeir::flatbuf::Bound::Preceding ? static_cast(lower_bound()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Following *lower_bound_as_Following() const { - return lower_bound_type() == org::apache::arrow::computeir::flatbuf::Bound::Following ? static_cast(lower_bound()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::CurrentRow *lower_bound_as_CurrentRow() const { - return lower_bound_type() == org::apache::arrow::computeir::flatbuf::Bound::CurrentRow ? static_cast(lower_bound()) : nullptr; - } - org::apache::arrow::computeir::flatbuf::Bound upper_bound_type() const { - return static_cast(GetField(VT_UPPER_BOUND_TYPE, 0)); - } - /// Upper window bound - const void *upper_bound() const { - return GetPointer(VT_UPPER_BOUND); - } - template const T *upper_bound_as() const; - const org::apache::arrow::computeir::flatbuf::Preceding *upper_bound_as_Preceding() const { - return upper_bound_type() == org::apache::arrow::computeir::flatbuf::Bound::Preceding ? static_cast(upper_bound()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Following *upper_bound_as_Following() const { - return upper_bound_type() == org::apache::arrow::computeir::flatbuf::Bound::Following ? static_cast(upper_bound()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::CurrentRow *upper_bound_as_CurrentRow() const { - return upper_bound_type() == org::apache::arrow::computeir::flatbuf::Bound::CurrentRow ? static_cast(upper_bound()) : nullptr; - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_EXPRESSION) && - verifier.VerifyTable(expression()) && - VerifyField(verifier, VT_KIND) && - VerifyOffsetRequired(verifier, VT_PARTITIONS) && - verifier.VerifyVector(partitions()) && - verifier.VerifyVectorOfTables(partitions()) && - VerifyOffsetRequired(verifier, VT_ORDERINGS) && - verifier.VerifyVector(orderings()) && - verifier.VerifyVectorOfTables(orderings()) && - VerifyField(verifier, VT_LOWER_BOUND_TYPE) && - VerifyOffsetRequired(verifier, VT_LOWER_BOUND) && - VerifyBound(verifier, lower_bound(), lower_bound_type()) && - VerifyField(verifier, VT_UPPER_BOUND_TYPE) && - VerifyOffsetRequired(verifier, VT_UPPER_BOUND) && - VerifyBound(verifier, upper_bound(), upper_bound_type()) && - verifier.EndTable(); - } -}; - -template<> inline const org::apache::arrow::computeir::flatbuf::Preceding *WindowCall::lower_bound_as() const { - return lower_bound_as_Preceding(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Following *WindowCall::lower_bound_as() const { - return lower_bound_as_Following(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::CurrentRow *WindowCall::lower_bound_as() const { - return lower_bound_as_CurrentRow(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Preceding *WindowCall::upper_bound_as() const { - return upper_bound_as_Preceding(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Following *WindowCall::upper_bound_as() const { - return upper_bound_as_Following(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::CurrentRow *WindowCall::upper_bound_as() const { - return upper_bound_as_CurrentRow(); -} - -struct WindowCallBuilder { - typedef WindowCall Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_expression(flatbuffers::Offset expression) { - fbb_.AddOffset(WindowCall::VT_EXPRESSION, expression); - } - void add_kind(org::apache::arrow::computeir::flatbuf::Frame kind) { - fbb_.AddElement(WindowCall::VT_KIND, static_cast(kind), 0); - } - void add_partitions(flatbuffers::Offset>> partitions) { - fbb_.AddOffset(WindowCall::VT_PARTITIONS, partitions); - } - void add_orderings(flatbuffers::Offset>> orderings) { - fbb_.AddOffset(WindowCall::VT_ORDERINGS, orderings); - } - void add_lower_bound_type(org::apache::arrow::computeir::flatbuf::Bound lower_bound_type) { - fbb_.AddElement(WindowCall::VT_LOWER_BOUND_TYPE, static_cast(lower_bound_type), 0); - } - void add_lower_bound(flatbuffers::Offset lower_bound) { - fbb_.AddOffset(WindowCall::VT_LOWER_BOUND, lower_bound); - } - void add_upper_bound_type(org::apache::arrow::computeir::flatbuf::Bound upper_bound_type) { - fbb_.AddElement(WindowCall::VT_UPPER_BOUND_TYPE, static_cast(upper_bound_type), 0); - } - void add_upper_bound(flatbuffers::Offset upper_bound) { - fbb_.AddOffset(WindowCall::VT_UPPER_BOUND, upper_bound); - } - explicit WindowCallBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - WindowCallBuilder &operator=(const WindowCallBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, WindowCall::VT_EXPRESSION); - fbb_.Required(o, WindowCall::VT_PARTITIONS); - fbb_.Required(o, WindowCall::VT_ORDERINGS); - fbb_.Required(o, WindowCall::VT_LOWER_BOUND); - fbb_.Required(o, WindowCall::VT_UPPER_BOUND); - return o; - } -}; - -inline flatbuffers::Offset CreateWindowCall( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset expression = 0, - org::apache::arrow::computeir::flatbuf::Frame kind = org::apache::arrow::computeir::flatbuf::Frame::Rows, - flatbuffers::Offset>> partitions = 0, - flatbuffers::Offset>> orderings = 0, - org::apache::arrow::computeir::flatbuf::Bound lower_bound_type = org::apache::arrow::computeir::flatbuf::Bound::NONE, - flatbuffers::Offset lower_bound = 0, - org::apache::arrow::computeir::flatbuf::Bound upper_bound_type = org::apache::arrow::computeir::flatbuf::Bound::NONE, - flatbuffers::Offset upper_bound = 0) { - WindowCallBuilder builder_(_fbb); - builder_.add_upper_bound(upper_bound); - builder_.add_lower_bound(lower_bound); - builder_.add_orderings(orderings); - builder_.add_partitions(partitions); - builder_.add_expression(expression); - builder_.add_upper_bound_type(upper_bound_type); - builder_.add_lower_bound_type(lower_bound_type); - builder_.add_kind(kind); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateWindowCallDirect( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset expression = 0, - org::apache::arrow::computeir::flatbuf::Frame kind = org::apache::arrow::computeir::flatbuf::Frame::Rows, - const std::vector> *partitions = nullptr, - const std::vector> *orderings = nullptr, - org::apache::arrow::computeir::flatbuf::Bound lower_bound_type = org::apache::arrow::computeir::flatbuf::Bound::NONE, - flatbuffers::Offset lower_bound = 0, - org::apache::arrow::computeir::flatbuf::Bound upper_bound_type = org::apache::arrow::computeir::flatbuf::Bound::NONE, - flatbuffers::Offset upper_bound = 0) { - auto partitions__ = partitions ? _fbb.CreateVector>(*partitions) : 0; - auto orderings__ = orderings ? _fbb.CreateVector>(*orderings) : 0; - return org::apache::arrow::computeir::flatbuf::CreateWindowCall( - _fbb, - expression, - kind, - partitions__, - orderings__, - lower_bound_type, - lower_bound, - upper_bound_type, - upper_bound); -} - -/// A cast expression -struct Cast FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef CastBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_OPERAND = 4, - VT_TO = 6 - }; - /// The expression to cast - const org::apache::arrow::computeir::flatbuf::Expression *operand() const { - return GetPointer(VT_OPERAND); - } - /// The type to cast to. This value is a `Field` to allow complete representation - /// of arrow types. - /// - /// `Type` is unable to completely represent complex types like lists and - /// maps. - const org::apache::arrow::flatbuf::Field *to() const { - return GetPointer(VT_TO); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_OPERAND) && - verifier.VerifyTable(operand()) && - VerifyOffsetRequired(verifier, VT_TO) && - verifier.VerifyTable(to()) && - verifier.EndTable(); - } -}; - -struct CastBuilder { - typedef Cast Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_operand(flatbuffers::Offset operand) { - fbb_.AddOffset(Cast::VT_OPERAND, operand); - } - void add_to(flatbuffers::Offset to) { - fbb_.AddOffset(Cast::VT_TO, to); - } - explicit CastBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - CastBuilder &operator=(const CastBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Cast::VT_OPERAND); - fbb_.Required(o, Cast::VT_TO); - return o; - } -}; - -inline flatbuffers::Offset CreateCast( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset operand = 0, - flatbuffers::Offset to = 0) { - CastBuilder builder_(_fbb); - builder_.add_to(to); - builder_.add_operand(operand); - return builder_.Finish(); -} - -/// Expression types -/// -/// Expressions have a concrete `impl` value, which is a specific operation. -/// -/// This is a workaround for flatbuffers' lack of support for direct use of -/// union types. -struct Expression FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef ExpressionBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_IMPL_TYPE = 4, - VT_IMPL = 6 - }; - org::apache::arrow::computeir::flatbuf::ExpressionImpl impl_type() const { - return static_cast(GetField(VT_IMPL_TYPE, 0)); - } - const void *impl() const { - return GetPointer(VT_IMPL); - } - template const T *impl_as() const; - const org::apache::arrow::computeir::flatbuf::Literal *impl_as_Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ExpressionImpl::Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::FieldRef *impl_as_FieldRef() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ExpressionImpl::FieldRef ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Call *impl_as_Call() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ExpressionImpl::Call ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::ConditionalCase *impl_as_ConditionalCase() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ExpressionImpl::ConditionalCase ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::SimpleCase *impl_as_SimpleCase() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ExpressionImpl::SimpleCase ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::WindowCall *impl_as_WindowCall() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ExpressionImpl::WindowCall ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Cast *impl_as_Cast() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::ExpressionImpl::Cast ? static_cast(impl()) : nullptr; - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_IMPL_TYPE) && - VerifyOffsetRequired(verifier, VT_IMPL) && - VerifyExpressionImpl(verifier, impl(), impl_type()) && - verifier.EndTable(); - } -}; - -template<> inline const org::apache::arrow::computeir::flatbuf::Literal *Expression::impl_as() const { - return impl_as_Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::FieldRef *Expression::impl_as() const { - return impl_as_FieldRef(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Call *Expression::impl_as() const { - return impl_as_Call(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::ConditionalCase *Expression::impl_as() const { - return impl_as_ConditionalCase(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::SimpleCase *Expression::impl_as() const { - return impl_as_SimpleCase(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::WindowCall *Expression::impl_as() const { - return impl_as_WindowCall(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Cast *Expression::impl_as() const { - return impl_as_Cast(); -} - -struct ExpressionBuilder { - typedef Expression Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_impl_type(org::apache::arrow::computeir::flatbuf::ExpressionImpl impl_type) { - fbb_.AddElement(Expression::VT_IMPL_TYPE, static_cast(impl_type), 0); - } - void add_impl(flatbuffers::Offset impl) { - fbb_.AddOffset(Expression::VT_IMPL, impl); - } - explicit ExpressionBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - ExpressionBuilder &operator=(const ExpressionBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Expression::VT_IMPL); - return o; - } -}; - -inline flatbuffers::Offset CreateExpression( - flatbuffers::FlatBufferBuilder &_fbb, - org::apache::arrow::computeir::flatbuf::ExpressionImpl impl_type = org::apache::arrow::computeir::flatbuf::ExpressionImpl::NONE, - flatbuffers::Offset impl = 0) { - ExpressionBuilder builder_(_fbb); - builder_.add_impl(impl); - builder_.add_impl_type(impl_type); - return builder_.Finish(); -} - -inline bool VerifyDeref(flatbuffers::Verifier &verifier, const void *obj, Deref type) { - switch (type) { - case Deref::NONE: { - return true; - } - case Deref::MapKey: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case Deref::StructField: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case Deref::ArraySubscript: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case Deref::ArraySlice: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case Deref::FieldIndex: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - default: return true; - } -} - -inline bool VerifyDerefVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { - if (!values || !types) return !values && !types; - if (values->size() != types->size()) return false; - for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { - if (!VerifyDeref( - verifier, values->Get(i), types->GetEnum(i))) { - return false; - } - } - return true; -} - -inline bool VerifyConcreteBoundImpl(flatbuffers::Verifier &verifier, const void *obj, ConcreteBoundImpl type) { - switch (type) { - case ConcreteBoundImpl::NONE: { - return true; - } - case ConcreteBoundImpl::Expression: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case ConcreteBoundImpl::Unbounded: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - default: return true; - } -} - -inline bool VerifyConcreteBoundImplVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { - if (!values || !types) return !values && !types; - if (values->size() != types->size()) return false; - for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { - if (!VerifyConcreteBoundImpl( - verifier, values->Get(i), types->GetEnum(i))) { - return false; - } - } - return true; -} - -inline bool VerifyBound(flatbuffers::Verifier &verifier, const void *obj, Bound type) { - switch (type) { - case Bound::NONE: { - return true; - } - case Bound::Preceding: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case Bound::Following: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case Bound::CurrentRow: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - default: return true; - } -} - -inline bool VerifyBoundVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { - if (!values || !types) return !values && !types; - if (values->size() != types->size()) return false; - for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { - if (!VerifyBound( - verifier, values->Get(i), types->GetEnum(i))) { - return false; - } - } - return true; -} - -inline bool VerifyExpressionImpl(flatbuffers::Verifier &verifier, const void *obj, ExpressionImpl type) { - switch (type) { - case ExpressionImpl::NONE: { - return true; - } - case ExpressionImpl::Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case ExpressionImpl::FieldRef: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case ExpressionImpl::Call: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case ExpressionImpl::ConditionalCase: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case ExpressionImpl::SimpleCase: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case ExpressionImpl::WindowCall: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case ExpressionImpl::Cast: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - default: return true; - } -} - -inline bool VerifyExpressionImplVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { - if (!values || !types) return !values && !types; - if (values->size() != types->size()) return false; - for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { - if (!VerifyExpressionImpl( - verifier, values->Get(i), types->GetEnum(i))) { - return false; - } - } - return true; -} - -inline const org::apache::arrow::computeir::flatbuf::Expression *GetExpression(const void *buf) { - return flatbuffers::GetRoot(buf); -} - -inline const org::apache::arrow::computeir::flatbuf::Expression *GetSizePrefixedExpression(const void *buf) { - return flatbuffers::GetSizePrefixedRoot(buf); -} - -inline bool VerifyExpressionBuffer( - flatbuffers::Verifier &verifier) { - return verifier.VerifyBuffer(nullptr); -} - -inline bool VerifySizePrefixedExpressionBuffer( - flatbuffers::Verifier &verifier) { - return verifier.VerifySizePrefixedBuffer(nullptr); -} - -inline void FinishExpressionBuffer( - flatbuffers::FlatBufferBuilder &fbb, - flatbuffers::Offset root) { - fbb.Finish(root); -} - -inline void FinishSizePrefixedExpressionBuffer( - flatbuffers::FlatBufferBuilder &fbb, - flatbuffers::Offset root) { - fbb.FinishSizePrefixed(root); -} - -} // namespace flatbuf -} // namespace computeir -} // namespace arrow -} // namespace apache -} // namespace org - -#endif // FLATBUFFERS_GENERATED_EXPRESSION_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ diff --git a/cpp/src/generated/Literal_generated.h b/cpp/src/generated/Literal_generated.h deleted file mode 100644 index ea095a8244783..0000000000000 --- a/cpp/src/generated/Literal_generated.h +++ /dev/null @@ -1,2037 +0,0 @@ -// automatically generated by the FlatBuffers compiler, do not modify - - -#ifndef FLATBUFFERS_GENERATED_LITERAL_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ -#define FLATBUFFERS_GENERATED_LITERAL_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ - -#include "flatbuffers/flatbuffers.h" - -#include "Schema_generated.h" - -namespace org { -namespace apache { -namespace arrow { -namespace computeir { -namespace flatbuf { - -struct ListLiteral; -struct ListLiteralBuilder; - -struct StructLiteral; -struct StructLiteralBuilder; - -struct KeyValue; -struct KeyValueBuilder; - -struct MapLiteral; -struct MapLiteralBuilder; - -struct Int8Literal; -struct Int8LiteralBuilder; - -struct Int16Literal; -struct Int16LiteralBuilder; - -struct Int32Literal; -struct Int32LiteralBuilder; - -struct Int64Literal; -struct Int64LiteralBuilder; - -struct UInt8Literal; -struct UInt8LiteralBuilder; - -struct UInt16Literal; -struct UInt16LiteralBuilder; - -struct UInt32Literal; -struct UInt32LiteralBuilder; - -struct UInt64Literal; -struct UInt64LiteralBuilder; - -struct Float16Literal; -struct Float16LiteralBuilder; - -struct Float32Literal; -struct Float32LiteralBuilder; - -struct Float64Literal; -struct Float64LiteralBuilder; - -struct DecimalLiteral; -struct DecimalLiteralBuilder; - -struct BooleanLiteral; -struct BooleanLiteralBuilder; - -struct DateLiteral; -struct DateLiteralBuilder; - -struct TimeLiteral; -struct TimeLiteralBuilder; - -struct TimestampLiteral; -struct TimestampLiteralBuilder; - -struct IntervalLiteralMonths; -struct IntervalLiteralMonthsBuilder; - -struct IntervalLiteralDaysMilliseconds; -struct IntervalLiteralDaysMillisecondsBuilder; - -struct IntervalLiteral; -struct IntervalLiteralBuilder; - -struct DurationLiteral; -struct DurationLiteralBuilder; - -struct BinaryLiteral; -struct BinaryLiteralBuilder; - -struct FixedSizeBinaryLiteral; -struct FixedSizeBinaryLiteralBuilder; - -struct StringLiteral; -struct StringLiteralBuilder; - -struct Literal; -struct LiteralBuilder; - -enum class IntervalLiteralImpl : uint8_t { - NONE = 0, - IntervalLiteralMonths = 1, - IntervalLiteralDaysMilliseconds = 2, - MIN = NONE, - MAX = IntervalLiteralDaysMilliseconds -}; - -inline const IntervalLiteralImpl (&EnumValuesIntervalLiteralImpl())[3] { - static const IntervalLiteralImpl values[] = { - IntervalLiteralImpl::NONE, - IntervalLiteralImpl::IntervalLiteralMonths, - IntervalLiteralImpl::IntervalLiteralDaysMilliseconds - }; - return values; -} - -inline const char * const *EnumNamesIntervalLiteralImpl() { - static const char * const names[4] = { - "NONE", - "IntervalLiteralMonths", - "IntervalLiteralDaysMilliseconds", - nullptr - }; - return names; -} - -inline const char *EnumNameIntervalLiteralImpl(IntervalLiteralImpl e) { - if (flatbuffers::IsOutRange(e, IntervalLiteralImpl::NONE, IntervalLiteralImpl::IntervalLiteralDaysMilliseconds)) return ""; - const size_t index = static_cast(e); - return EnumNamesIntervalLiteralImpl()[index]; -} - -template struct IntervalLiteralImplTraits { - static const IntervalLiteralImpl enum_value = IntervalLiteralImpl::NONE; -}; - -template<> struct IntervalLiteralImplTraits { - static const IntervalLiteralImpl enum_value = IntervalLiteralImpl::IntervalLiteralMonths; -}; - -template<> struct IntervalLiteralImplTraits { - static const IntervalLiteralImpl enum_value = IntervalLiteralImpl::IntervalLiteralDaysMilliseconds; -}; - -bool VerifyIntervalLiteralImpl(flatbuffers::Verifier &verifier, const void *obj, IntervalLiteralImpl type); -bool VerifyIntervalLiteralImplVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); - -enum class LiteralImpl : uint8_t { - NONE = 0, - BooleanLiteral = 1, - Int8Literal = 2, - Int16Literal = 3, - Int32Literal = 4, - Int64Literal = 5, - UInt8Literal = 6, - UInt16Literal = 7, - UInt32Literal = 8, - UInt64Literal = 9, - DateLiteral = 10, - TimeLiteral = 11, - TimestampLiteral = 12, - IntervalLiteral = 13, - DurationLiteral = 14, - DecimalLiteral = 15, - Float16Literal = 16, - Float32Literal = 17, - Float64Literal = 18, - ListLiteral = 19, - StructLiteral = 20, - MapLiteral = 21, - StringLiteral = 22, - BinaryLiteral = 23, - FixedSizeBinaryLiteral = 24, - MIN = NONE, - MAX = FixedSizeBinaryLiteral -}; - -inline const LiteralImpl (&EnumValuesLiteralImpl())[25] { - static const LiteralImpl values[] = { - LiteralImpl::NONE, - LiteralImpl::BooleanLiteral, - LiteralImpl::Int8Literal, - LiteralImpl::Int16Literal, - LiteralImpl::Int32Literal, - LiteralImpl::Int64Literal, - LiteralImpl::UInt8Literal, - LiteralImpl::UInt16Literal, - LiteralImpl::UInt32Literal, - LiteralImpl::UInt64Literal, - LiteralImpl::DateLiteral, - LiteralImpl::TimeLiteral, - LiteralImpl::TimestampLiteral, - LiteralImpl::IntervalLiteral, - LiteralImpl::DurationLiteral, - LiteralImpl::DecimalLiteral, - LiteralImpl::Float16Literal, - LiteralImpl::Float32Literal, - LiteralImpl::Float64Literal, - LiteralImpl::ListLiteral, - LiteralImpl::StructLiteral, - LiteralImpl::MapLiteral, - LiteralImpl::StringLiteral, - LiteralImpl::BinaryLiteral, - LiteralImpl::FixedSizeBinaryLiteral - }; - return values; -} - -inline const char * const *EnumNamesLiteralImpl() { - static const char * const names[26] = { - "NONE", - "BooleanLiteral", - "Int8Literal", - "Int16Literal", - "Int32Literal", - "Int64Literal", - "UInt8Literal", - "UInt16Literal", - "UInt32Literal", - "UInt64Literal", - "DateLiteral", - "TimeLiteral", - "TimestampLiteral", - "IntervalLiteral", - "DurationLiteral", - "DecimalLiteral", - "Float16Literal", - "Float32Literal", - "Float64Literal", - "ListLiteral", - "StructLiteral", - "MapLiteral", - "StringLiteral", - "BinaryLiteral", - "FixedSizeBinaryLiteral", - nullptr - }; - return names; -} - -inline const char *EnumNameLiteralImpl(LiteralImpl e) { - if (flatbuffers::IsOutRange(e, LiteralImpl::NONE, LiteralImpl::FixedSizeBinaryLiteral)) return ""; - const size_t index = static_cast(e); - return EnumNamesLiteralImpl()[index]; -} - -template struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::NONE; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::BooleanLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::Int8Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::Int16Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::Int32Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::Int64Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::UInt8Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::UInt16Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::UInt32Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::UInt64Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::DateLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::TimeLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::TimestampLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::IntervalLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::DurationLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::DecimalLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::Float16Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::Float32Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::Float64Literal; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::ListLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::StructLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::MapLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::StringLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::BinaryLiteral; -}; - -template<> struct LiteralImplTraits { - static const LiteralImpl enum_value = LiteralImpl::FixedSizeBinaryLiteral; -}; - -bool VerifyLiteralImpl(flatbuffers::Verifier &verifier, const void *obj, LiteralImpl type); -bool VerifyLiteralImplVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); - -struct ListLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef ListLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUES = 4 - }; - const flatbuffers::Vector> *values() const { - return GetPointer> *>(VT_VALUES); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_VALUES) && - verifier.VerifyVector(values()) && - verifier.VerifyVectorOfTables(values()) && - verifier.EndTable(); - } -}; - -struct ListLiteralBuilder { - typedef ListLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_values(flatbuffers::Offset>> values) { - fbb_.AddOffset(ListLiteral::VT_VALUES, values); - } - explicit ListLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - ListLiteralBuilder &operator=(const ListLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, ListLiteral::VT_VALUES); - return o; - } -}; - -inline flatbuffers::Offset CreateListLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> values = 0) { - ListLiteralBuilder builder_(_fbb); - builder_.add_values(values); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateListLiteralDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector> *values = nullptr) { - auto values__ = values ? _fbb.CreateVector>(*values) : 0; - return org::apache::arrow::computeir::flatbuf::CreateListLiteral( - _fbb, - values__); -} - -struct StructLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef StructLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUES = 4 - }; - /// Values for each struct field; the order must match the order of fields - /// in the `type` field of `Literal`. - const flatbuffers::Vector> *values() const { - return GetPointer> *>(VT_VALUES); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_VALUES) && - verifier.VerifyVector(values()) && - verifier.VerifyVectorOfTables(values()) && - verifier.EndTable(); - } -}; - -struct StructLiteralBuilder { - typedef StructLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_values(flatbuffers::Offset>> values) { - fbb_.AddOffset(StructLiteral::VT_VALUES, values); - } - explicit StructLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - StructLiteralBuilder &operator=(const StructLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, StructLiteral::VT_VALUES); - return o; - } -}; - -inline flatbuffers::Offset CreateStructLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> values = 0) { - StructLiteralBuilder builder_(_fbb); - builder_.add_values(values); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateStructLiteralDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector> *values = nullptr) { - auto values__ = values ? _fbb.CreateVector>(*values) : 0; - return org::apache::arrow::computeir::flatbuf::CreateStructLiteral( - _fbb, - values__); -} - -struct KeyValue FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef KeyValueBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_KEY = 4, - VT_VALUE = 6 - }; - const org::apache::arrow::computeir::flatbuf::Literal *key() const { - return GetPointer(VT_KEY); - } - const org::apache::arrow::computeir::flatbuf::Literal *value() const { - return GetPointer(VT_VALUE); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_KEY) && - verifier.VerifyTable(key()) && - VerifyOffsetRequired(verifier, VT_VALUE) && - verifier.VerifyTable(value()) && - verifier.EndTable(); - } -}; - -struct KeyValueBuilder { - typedef KeyValue Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_key(flatbuffers::Offset key) { - fbb_.AddOffset(KeyValue::VT_KEY, key); - } - void add_value(flatbuffers::Offset value) { - fbb_.AddOffset(KeyValue::VT_VALUE, value); - } - explicit KeyValueBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - KeyValueBuilder &operator=(const KeyValueBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, KeyValue::VT_KEY); - fbb_.Required(o, KeyValue::VT_VALUE); - return o; - } -}; - -inline flatbuffers::Offset CreateKeyValue( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset key = 0, - flatbuffers::Offset value = 0) { - KeyValueBuilder builder_(_fbb); - builder_.add_value(value); - builder_.add_key(key); - return builder_.Finish(); -} - -struct MapLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef MapLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUES = 4 - }; - const flatbuffers::Vector> *values() const { - return GetPointer> *>(VT_VALUES); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_VALUES) && - verifier.VerifyVector(values()) && - verifier.VerifyVectorOfTables(values()) && - verifier.EndTable(); - } -}; - -struct MapLiteralBuilder { - typedef MapLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_values(flatbuffers::Offset>> values) { - fbb_.AddOffset(MapLiteral::VT_VALUES, values); - } - explicit MapLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - MapLiteralBuilder &operator=(const MapLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, MapLiteral::VT_VALUES); - return o; - } -}; - -inline flatbuffers::Offset CreateMapLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> values = 0) { - MapLiteralBuilder builder_(_fbb); - builder_.add_values(values); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateMapLiteralDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector> *values = nullptr) { - auto values__ = values ? _fbb.CreateVector>(*values) : 0; - return org::apache::arrow::computeir::flatbuf::CreateMapLiteral( - _fbb, - values__); -} - -struct Int8Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef Int8LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - int8_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct Int8LiteralBuilder { - typedef Int8Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(int8_t value) { - fbb_.AddElement(Int8Literal::VT_VALUE, value, 0); - } - explicit Int8LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - Int8LiteralBuilder &operator=(const Int8LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateInt8Literal( - flatbuffers::FlatBufferBuilder &_fbb, - int8_t value = 0) { - Int8LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct Int16Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef Int16LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - int16_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct Int16LiteralBuilder { - typedef Int16Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(int16_t value) { - fbb_.AddElement(Int16Literal::VT_VALUE, value, 0); - } - explicit Int16LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - Int16LiteralBuilder &operator=(const Int16LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateInt16Literal( - flatbuffers::FlatBufferBuilder &_fbb, - int16_t value = 0) { - Int16LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct Int32Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef Int32LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - int32_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct Int32LiteralBuilder { - typedef Int32Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(int32_t value) { - fbb_.AddElement(Int32Literal::VT_VALUE, value, 0); - } - explicit Int32LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - Int32LiteralBuilder &operator=(const Int32LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateInt32Literal( - flatbuffers::FlatBufferBuilder &_fbb, - int32_t value = 0) { - Int32LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct Int64Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef Int64LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - int64_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct Int64LiteralBuilder { - typedef Int64Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(int64_t value) { - fbb_.AddElement(Int64Literal::VT_VALUE, value, 0); - } - explicit Int64LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - Int64LiteralBuilder &operator=(const Int64LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateInt64Literal( - flatbuffers::FlatBufferBuilder &_fbb, - int64_t value = 0) { - Int64LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct UInt8Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef UInt8LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - uint8_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct UInt8LiteralBuilder { - typedef UInt8Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(uint8_t value) { - fbb_.AddElement(UInt8Literal::VT_VALUE, value, 0); - } - explicit UInt8LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - UInt8LiteralBuilder &operator=(const UInt8LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateUInt8Literal( - flatbuffers::FlatBufferBuilder &_fbb, - uint8_t value = 0) { - UInt8LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct UInt16Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef UInt16LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - uint16_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct UInt16LiteralBuilder { - typedef UInt16Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(uint16_t value) { - fbb_.AddElement(UInt16Literal::VT_VALUE, value, 0); - } - explicit UInt16LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - UInt16LiteralBuilder &operator=(const UInt16LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateUInt16Literal( - flatbuffers::FlatBufferBuilder &_fbb, - uint16_t value = 0) { - UInt16LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct UInt32Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef UInt32LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - uint32_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct UInt32LiteralBuilder { - typedef UInt32Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(uint32_t value) { - fbb_.AddElement(UInt32Literal::VT_VALUE, value, 0); - } - explicit UInt32LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - UInt32LiteralBuilder &operator=(const UInt32LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateUInt32Literal( - flatbuffers::FlatBufferBuilder &_fbb, - uint32_t value = 0) { - UInt32LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct UInt64Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef UInt64LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - uint64_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct UInt64LiteralBuilder { - typedef UInt64Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(uint64_t value) { - fbb_.AddElement(UInt64Literal::VT_VALUE, value, 0); - } - explicit UInt64LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - UInt64LiteralBuilder &operator=(const UInt64LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateUInt64Literal( - flatbuffers::FlatBufferBuilder &_fbb, - uint64_t value = 0) { - UInt64LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct Float16Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef Float16LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - uint16_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct Float16LiteralBuilder { - typedef Float16Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(uint16_t value) { - fbb_.AddElement(Float16Literal::VT_VALUE, value, 0); - } - explicit Float16LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - Float16LiteralBuilder &operator=(const Float16LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateFloat16Literal( - flatbuffers::FlatBufferBuilder &_fbb, - uint16_t value = 0) { - Float16LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct Float32Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef Float32LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - float value() const { - return GetField(VT_VALUE, 0.0f); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct Float32LiteralBuilder { - typedef Float32Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(float value) { - fbb_.AddElement(Float32Literal::VT_VALUE, value, 0.0f); - } - explicit Float32LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - Float32LiteralBuilder &operator=(const Float32LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateFloat32Literal( - flatbuffers::FlatBufferBuilder &_fbb, - float value = 0.0f) { - Float32LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct Float64Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef Float64LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - double value() const { - return GetField(VT_VALUE, 0.0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct Float64LiteralBuilder { - typedef Float64Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(double value) { - fbb_.AddElement(Float64Literal::VT_VALUE, value, 0.0); - } - explicit Float64LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - Float64LiteralBuilder &operator=(const Float64LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateFloat64Literal( - flatbuffers::FlatBufferBuilder &_fbb, - double value = 0.0) { - Float64LiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct DecimalLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef DecimalLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - /// Bytes of a Decimal value; bytes must be in little-endian order. - const flatbuffers::Vector *value() const { - return GetPointer *>(VT_VALUE); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_VALUE) && - verifier.VerifyVector(value()) && - verifier.EndTable(); - } -}; - -struct DecimalLiteralBuilder { - typedef DecimalLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(flatbuffers::Offset> value) { - fbb_.AddOffset(DecimalLiteral::VT_VALUE, value); - } - explicit DecimalLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - DecimalLiteralBuilder &operator=(const DecimalLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, DecimalLiteral::VT_VALUE); - return o; - } -}; - -inline flatbuffers::Offset CreateDecimalLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset> value = 0) { - DecimalLiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateDecimalLiteralDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector *value = nullptr) { - auto value__ = value ? _fbb.CreateVector(*value) : 0; - return org::apache::arrow::computeir::flatbuf::CreateDecimalLiteral( - _fbb, - value__); -} - -struct BooleanLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef BooleanLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - bool value() const { - return GetField(VT_VALUE, 0) != 0; - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct BooleanLiteralBuilder { - typedef BooleanLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(bool value) { - fbb_.AddElement(BooleanLiteral::VT_VALUE, static_cast(value), 0); - } - explicit BooleanLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - BooleanLiteralBuilder &operator=(const BooleanLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateBooleanLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - bool value = false) { - BooleanLiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct DateLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef DateLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - int64_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct DateLiteralBuilder { - typedef DateLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(int64_t value) { - fbb_.AddElement(DateLiteral::VT_VALUE, value, 0); - } - explicit DateLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - DateLiteralBuilder &operator=(const DateLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateDateLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - int64_t value = 0) { - DateLiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct TimeLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef TimeLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - int64_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct TimeLiteralBuilder { - typedef TimeLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(int64_t value) { - fbb_.AddElement(TimeLiteral::VT_VALUE, value, 0); - } - explicit TimeLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - TimeLiteralBuilder &operator=(const TimeLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateTimeLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - int64_t value = 0) { - TimeLiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct TimestampLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef TimestampLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - int64_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct TimestampLiteralBuilder { - typedef TimestampLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(int64_t value) { - fbb_.AddElement(TimestampLiteral::VT_VALUE, value, 0); - } - explicit TimestampLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - TimestampLiteralBuilder &operator=(const TimestampLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateTimestampLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - int64_t value = 0) { - TimestampLiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct IntervalLiteralMonths FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef IntervalLiteralMonthsBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_MONTHS = 4 - }; - int32_t months() const { - return GetField(VT_MONTHS, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_MONTHS) && - verifier.EndTable(); - } -}; - -struct IntervalLiteralMonthsBuilder { - typedef IntervalLiteralMonths Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_months(int32_t months) { - fbb_.AddElement(IntervalLiteralMonths::VT_MONTHS, months, 0); - } - explicit IntervalLiteralMonthsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - IntervalLiteralMonthsBuilder &operator=(const IntervalLiteralMonthsBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateIntervalLiteralMonths( - flatbuffers::FlatBufferBuilder &_fbb, - int32_t months = 0) { - IntervalLiteralMonthsBuilder builder_(_fbb); - builder_.add_months(months); - return builder_.Finish(); -} - -struct IntervalLiteralDaysMilliseconds FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef IntervalLiteralDaysMillisecondsBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_DAYS = 4, - VT_MILLISECONDS = 6 - }; - int32_t days() const { - return GetField(VT_DAYS, 0); - } - int32_t milliseconds() const { - return GetField(VT_MILLISECONDS, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_DAYS) && - VerifyField(verifier, VT_MILLISECONDS) && - verifier.EndTable(); - } -}; - -struct IntervalLiteralDaysMillisecondsBuilder { - typedef IntervalLiteralDaysMilliseconds Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_days(int32_t days) { - fbb_.AddElement(IntervalLiteralDaysMilliseconds::VT_DAYS, days, 0); - } - void add_milliseconds(int32_t milliseconds) { - fbb_.AddElement(IntervalLiteralDaysMilliseconds::VT_MILLISECONDS, milliseconds, 0); - } - explicit IntervalLiteralDaysMillisecondsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - IntervalLiteralDaysMillisecondsBuilder &operator=(const IntervalLiteralDaysMillisecondsBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateIntervalLiteralDaysMilliseconds( - flatbuffers::FlatBufferBuilder &_fbb, - int32_t days = 0, - int32_t milliseconds = 0) { - IntervalLiteralDaysMillisecondsBuilder builder_(_fbb); - builder_.add_milliseconds(milliseconds); - builder_.add_days(days); - return builder_.Finish(); -} - -struct IntervalLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef IntervalLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE_TYPE = 4, - VT_VALUE = 6 - }; - org::apache::arrow::computeir::flatbuf::IntervalLiteralImpl value_type() const { - return static_cast(GetField(VT_VALUE_TYPE, 0)); - } - const void *value() const { - return GetPointer(VT_VALUE); - } - template const T *value_as() const; - const org::apache::arrow::computeir::flatbuf::IntervalLiteralMonths *value_as_IntervalLiteralMonths() const { - return value_type() == org::apache::arrow::computeir::flatbuf::IntervalLiteralImpl::IntervalLiteralMonths ? static_cast(value()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::IntervalLiteralDaysMilliseconds *value_as_IntervalLiteralDaysMilliseconds() const { - return value_type() == org::apache::arrow::computeir::flatbuf::IntervalLiteralImpl::IntervalLiteralDaysMilliseconds ? static_cast(value()) : nullptr; - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE_TYPE) && - VerifyOffsetRequired(verifier, VT_VALUE) && - VerifyIntervalLiteralImpl(verifier, value(), value_type()) && - verifier.EndTable(); - } -}; - -template<> inline const org::apache::arrow::computeir::flatbuf::IntervalLiteralMonths *IntervalLiteral::value_as() const { - return value_as_IntervalLiteralMonths(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::IntervalLiteralDaysMilliseconds *IntervalLiteral::value_as() const { - return value_as_IntervalLiteralDaysMilliseconds(); -} - -struct IntervalLiteralBuilder { - typedef IntervalLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value_type(org::apache::arrow::computeir::flatbuf::IntervalLiteralImpl value_type) { - fbb_.AddElement(IntervalLiteral::VT_VALUE_TYPE, static_cast(value_type), 0); - } - void add_value(flatbuffers::Offset value) { - fbb_.AddOffset(IntervalLiteral::VT_VALUE, value); - } - explicit IntervalLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - IntervalLiteralBuilder &operator=(const IntervalLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, IntervalLiteral::VT_VALUE); - return o; - } -}; - -inline flatbuffers::Offset CreateIntervalLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - org::apache::arrow::computeir::flatbuf::IntervalLiteralImpl value_type = org::apache::arrow::computeir::flatbuf::IntervalLiteralImpl::NONE, - flatbuffers::Offset value = 0) { - IntervalLiteralBuilder builder_(_fbb); - builder_.add_value(value); - builder_.add_value_type(value_type); - return builder_.Finish(); -} - -struct DurationLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef DurationLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - int64_t value() const { - return GetField(VT_VALUE, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_VALUE) && - verifier.EndTable(); - } -}; - -struct DurationLiteralBuilder { - typedef DurationLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(int64_t value) { - fbb_.AddElement(DurationLiteral::VT_VALUE, value, 0); - } - explicit DurationLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - DurationLiteralBuilder &operator=(const DurationLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateDurationLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - int64_t value = 0) { - DurationLiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -struct BinaryLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef BinaryLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - const flatbuffers::Vector *value() const { - return GetPointer *>(VT_VALUE); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_VALUE) && - verifier.VerifyVector(value()) && - verifier.EndTable(); - } -}; - -struct BinaryLiteralBuilder { - typedef BinaryLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(flatbuffers::Offset> value) { - fbb_.AddOffset(BinaryLiteral::VT_VALUE, value); - } - explicit BinaryLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - BinaryLiteralBuilder &operator=(const BinaryLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, BinaryLiteral::VT_VALUE); - return o; - } -}; - -inline flatbuffers::Offset CreateBinaryLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset> value = 0) { - BinaryLiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateBinaryLiteralDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector *value = nullptr) { - auto value__ = value ? _fbb.CreateVector(*value) : 0; - return org::apache::arrow::computeir::flatbuf::CreateBinaryLiteral( - _fbb, - value__); -} - -struct FixedSizeBinaryLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef FixedSizeBinaryLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - const flatbuffers::Vector *value() const { - return GetPointer *>(VT_VALUE); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_VALUE) && - verifier.VerifyVector(value()) && - verifier.EndTable(); - } -}; - -struct FixedSizeBinaryLiteralBuilder { - typedef FixedSizeBinaryLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(flatbuffers::Offset> value) { - fbb_.AddOffset(FixedSizeBinaryLiteral::VT_VALUE, value); - } - explicit FixedSizeBinaryLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - FixedSizeBinaryLiteralBuilder &operator=(const FixedSizeBinaryLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, FixedSizeBinaryLiteral::VT_VALUE); - return o; - } -}; - -inline flatbuffers::Offset CreateFixedSizeBinaryLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset> value = 0) { - FixedSizeBinaryLiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateFixedSizeBinaryLiteralDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector *value = nullptr) { - auto value__ = value ? _fbb.CreateVector(*value) : 0; - return org::apache::arrow::computeir::flatbuf::CreateFixedSizeBinaryLiteral( - _fbb, - value__); -} - -struct StringLiteral FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef StringLiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_VALUE = 4 - }; - const flatbuffers::String *value() const { - return GetPointer(VT_VALUE); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_VALUE) && - verifier.VerifyString(value()) && - verifier.EndTable(); - } -}; - -struct StringLiteralBuilder { - typedef StringLiteral Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_value(flatbuffers::Offset value) { - fbb_.AddOffset(StringLiteral::VT_VALUE, value); - } - explicit StringLiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - StringLiteralBuilder &operator=(const StringLiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, StringLiteral::VT_VALUE); - return o; - } -}; - -inline flatbuffers::Offset CreateStringLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset value = 0) { - StringLiteralBuilder builder_(_fbb); - builder_.add_value(value); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateStringLiteralDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const char *value = nullptr) { - auto value__ = value ? _fbb.CreateString(value) : 0; - return org::apache::arrow::computeir::flatbuf::CreateStringLiteral( - _fbb, - value__); -} - -struct Literal FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef LiteralBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_IMPL_TYPE = 4, - VT_IMPL = 6, - VT_TYPE = 8 - }; - org::apache::arrow::computeir::flatbuf::LiteralImpl impl_type() const { - return static_cast(GetField(VT_IMPL_TYPE, 0)); - } - /// Literal value data; for null literals do not include this field. - const void *impl() const { - return GetPointer(VT_IMPL); - } - template const T *impl_as() const; - const org::apache::arrow::computeir::flatbuf::BooleanLiteral *impl_as_BooleanLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::BooleanLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Int8Literal *impl_as_Int8Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::Int8Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Int16Literal *impl_as_Int16Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::Int16Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Int32Literal *impl_as_Int32Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::Int32Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Int64Literal *impl_as_Int64Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::Int64Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::UInt8Literal *impl_as_UInt8Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::UInt8Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::UInt16Literal *impl_as_UInt16Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::UInt16Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::UInt32Literal *impl_as_UInt32Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::UInt32Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::UInt64Literal *impl_as_UInt64Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::UInt64Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::DateLiteral *impl_as_DateLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::DateLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::TimeLiteral *impl_as_TimeLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::TimeLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::TimestampLiteral *impl_as_TimestampLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::TimestampLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::IntervalLiteral *impl_as_IntervalLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::IntervalLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::DurationLiteral *impl_as_DurationLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::DurationLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::DecimalLiteral *impl_as_DecimalLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::DecimalLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Float16Literal *impl_as_Float16Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::Float16Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Float32Literal *impl_as_Float32Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::Float32Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Float64Literal *impl_as_Float64Literal() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::Float64Literal ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::ListLiteral *impl_as_ListLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::ListLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::StructLiteral *impl_as_StructLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::StructLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::MapLiteral *impl_as_MapLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::MapLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::StringLiteral *impl_as_StringLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::StringLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::BinaryLiteral *impl_as_BinaryLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::BinaryLiteral ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::FixedSizeBinaryLiteral *impl_as_FixedSizeBinaryLiteral() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::LiteralImpl::FixedSizeBinaryLiteral ? static_cast(impl()) : nullptr; - } - /// Type of the literal value. This must match `impl`. - const org::apache::arrow::flatbuf::Field *type() const { - return GetPointer(VT_TYPE); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_IMPL_TYPE) && - VerifyOffset(verifier, VT_IMPL) && - VerifyLiteralImpl(verifier, impl(), impl_type()) && - VerifyOffsetRequired(verifier, VT_TYPE) && - verifier.VerifyTable(type()) && - verifier.EndTable(); - } -}; - -template<> inline const org::apache::arrow::computeir::flatbuf::BooleanLiteral *Literal::impl_as() const { - return impl_as_BooleanLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Int8Literal *Literal::impl_as() const { - return impl_as_Int8Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Int16Literal *Literal::impl_as() const { - return impl_as_Int16Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Int32Literal *Literal::impl_as() const { - return impl_as_Int32Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Int64Literal *Literal::impl_as() const { - return impl_as_Int64Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::UInt8Literal *Literal::impl_as() const { - return impl_as_UInt8Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::UInt16Literal *Literal::impl_as() const { - return impl_as_UInt16Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::UInt32Literal *Literal::impl_as() const { - return impl_as_UInt32Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::UInt64Literal *Literal::impl_as() const { - return impl_as_UInt64Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::DateLiteral *Literal::impl_as() const { - return impl_as_DateLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::TimeLiteral *Literal::impl_as() const { - return impl_as_TimeLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::TimestampLiteral *Literal::impl_as() const { - return impl_as_TimestampLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::IntervalLiteral *Literal::impl_as() const { - return impl_as_IntervalLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::DurationLiteral *Literal::impl_as() const { - return impl_as_DurationLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::DecimalLiteral *Literal::impl_as() const { - return impl_as_DecimalLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Float16Literal *Literal::impl_as() const { - return impl_as_Float16Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Float32Literal *Literal::impl_as() const { - return impl_as_Float32Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Float64Literal *Literal::impl_as() const { - return impl_as_Float64Literal(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::ListLiteral *Literal::impl_as() const { - return impl_as_ListLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::StructLiteral *Literal::impl_as() const { - return impl_as_StructLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::MapLiteral *Literal::impl_as() const { - return impl_as_MapLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::StringLiteral *Literal::impl_as() const { - return impl_as_StringLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::BinaryLiteral *Literal::impl_as() const { - return impl_as_BinaryLiteral(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::FixedSizeBinaryLiteral *Literal::impl_as() const { - return impl_as_FixedSizeBinaryLiteral(); -} - -struct LiteralBuilder { - typedef Literal Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_impl_type(org::apache::arrow::computeir::flatbuf::LiteralImpl impl_type) { - fbb_.AddElement(Literal::VT_IMPL_TYPE, static_cast(impl_type), 0); - } - void add_impl(flatbuffers::Offset impl) { - fbb_.AddOffset(Literal::VT_IMPL, impl); - } - void add_type(flatbuffers::Offset type) { - fbb_.AddOffset(Literal::VT_TYPE, type); - } - explicit LiteralBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - LiteralBuilder &operator=(const LiteralBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Literal::VT_TYPE); - return o; - } -}; - -inline flatbuffers::Offset CreateLiteral( - flatbuffers::FlatBufferBuilder &_fbb, - org::apache::arrow::computeir::flatbuf::LiteralImpl impl_type = org::apache::arrow::computeir::flatbuf::LiteralImpl::NONE, - flatbuffers::Offset impl = 0, - flatbuffers::Offset type = 0) { - LiteralBuilder builder_(_fbb); - builder_.add_type(type); - builder_.add_impl(impl); - builder_.add_impl_type(impl_type); - return builder_.Finish(); -} - -inline bool VerifyIntervalLiteralImpl(flatbuffers::Verifier &verifier, const void *obj, IntervalLiteralImpl type) { - switch (type) { - case IntervalLiteralImpl::NONE: { - return true; - } - case IntervalLiteralImpl::IntervalLiteralMonths: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case IntervalLiteralImpl::IntervalLiteralDaysMilliseconds: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - default: return true; - } -} - -inline bool VerifyIntervalLiteralImplVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { - if (!values || !types) return !values && !types; - if (values->size() != types->size()) return false; - for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { - if (!VerifyIntervalLiteralImpl( - verifier, values->Get(i), types->GetEnum(i))) { - return false; - } - } - return true; -} - -inline bool VerifyLiteralImpl(flatbuffers::Verifier &verifier, const void *obj, LiteralImpl type) { - switch (type) { - case LiteralImpl::NONE: { - return true; - } - case LiteralImpl::BooleanLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::Int8Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::Int16Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::Int32Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::Int64Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::UInt8Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::UInt16Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::UInt32Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::UInt64Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::DateLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::TimeLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::TimestampLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::IntervalLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::DurationLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::DecimalLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::Float16Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::Float32Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::Float64Literal: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::ListLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::StructLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::MapLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::StringLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::BinaryLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case LiteralImpl::FixedSizeBinaryLiteral: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - default: return true; - } -} - -inline bool VerifyLiteralImplVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { - if (!values || !types) return !values && !types; - if (values->size() != types->size()) return false; - for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { - if (!VerifyLiteralImpl( - verifier, values->Get(i), types->GetEnum(i))) { - return false; - } - } - return true; -} - -inline const org::apache::arrow::computeir::flatbuf::Literal *GetLiteral(const void *buf) { - return flatbuffers::GetRoot(buf); -} - -inline const org::apache::arrow::computeir::flatbuf::Literal *GetSizePrefixedLiteral(const void *buf) { - return flatbuffers::GetSizePrefixedRoot(buf); -} - -inline bool VerifyLiteralBuffer( - flatbuffers::Verifier &verifier) { - return verifier.VerifyBuffer(nullptr); -} - -inline bool VerifySizePrefixedLiteralBuffer( - flatbuffers::Verifier &verifier) { - return verifier.VerifySizePrefixedBuffer(nullptr); -} - -inline void FinishLiteralBuffer( - flatbuffers::FlatBufferBuilder &fbb, - flatbuffers::Offset root) { - fbb.Finish(root); -} - -inline void FinishSizePrefixedLiteralBuffer( - flatbuffers::FlatBufferBuilder &fbb, - flatbuffers::Offset root) { - fbb.FinishSizePrefixed(root); -} - -} // namespace flatbuf -} // namespace computeir -} // namespace arrow -} // namespace apache -} // namespace org - -#endif // FLATBUFFERS_GENERATED_LITERAL_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ diff --git a/cpp/src/generated/Plan_generated.h b/cpp/src/generated/Plan_generated.h deleted file mode 100644 index 33f02af58a07f..0000000000000 --- a/cpp/src/generated/Plan_generated.h +++ /dev/null @@ -1,115 +0,0 @@ -// automatically generated by the FlatBuffers compiler, do not modify - - -#ifndef FLATBUFFERS_GENERATED_PLAN_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ -#define FLATBUFFERS_GENERATED_PLAN_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ - -#include "flatbuffers/flatbuffers.h" - -#include "Schema_generated.h" -#include "Expression_generated.h" -#include "Literal_generated.h" -#include "Relation_generated.h" - -namespace org { -namespace apache { -namespace arrow { -namespace computeir { -namespace flatbuf { - -struct Plan; -struct PlanBuilder; - -/// A specification of a query. -struct Plan FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef PlanBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_SINKS = 4 - }; - /// One or more output relations. - const flatbuffers::Vector> *sinks() const { - return GetPointer> *>(VT_SINKS); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_SINKS) && - verifier.VerifyVector(sinks()) && - verifier.VerifyVectorOfTables(sinks()) && - verifier.EndTable(); - } -}; - -struct PlanBuilder { - typedef Plan Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_sinks(flatbuffers::Offset>> sinks) { - fbb_.AddOffset(Plan::VT_SINKS, sinks); - } - explicit PlanBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - PlanBuilder &operator=(const PlanBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Plan::VT_SINKS); - return o; - } -}; - -inline flatbuffers::Offset CreatePlan( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> sinks = 0) { - PlanBuilder builder_(_fbb); - builder_.add_sinks(sinks); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreatePlanDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector> *sinks = nullptr) { - auto sinks__ = sinks ? _fbb.CreateVector>(*sinks) : 0; - return org::apache::arrow::computeir::flatbuf::CreatePlan( - _fbb, - sinks__); -} - -inline const org::apache::arrow::computeir::flatbuf::Plan *GetPlan(const void *buf) { - return flatbuffers::GetRoot(buf); -} - -inline const org::apache::arrow::computeir::flatbuf::Plan *GetSizePrefixedPlan(const void *buf) { - return flatbuffers::GetSizePrefixedRoot(buf); -} - -inline bool VerifyPlanBuffer( - flatbuffers::Verifier &verifier) { - return verifier.VerifyBuffer(nullptr); -} - -inline bool VerifySizePrefixedPlanBuffer( - flatbuffers::Verifier &verifier) { - return verifier.VerifySizePrefixedBuffer(nullptr); -} - -inline void FinishPlanBuffer( - flatbuffers::FlatBufferBuilder &fbb, - flatbuffers::Offset root) { - fbb.Finish(root); -} - -inline void FinishSizePrefixedPlanBuffer( - flatbuffers::FlatBufferBuilder &fbb, - flatbuffers::Offset root) { - fbb.FinishSizePrefixed(root); -} - -} // namespace flatbuf -} // namespace computeir -} // namespace arrow -} // namespace apache -} // namespace org - -#endif // FLATBUFFERS_GENERATED_PLAN_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ diff --git a/cpp/src/generated/Relation_generated.h b/cpp/src/generated/Relation_generated.h deleted file mode 100644 index 110e632aa3f68..0000000000000 --- a/cpp/src/generated/Relation_generated.h +++ /dev/null @@ -1,1428 +0,0 @@ -// automatically generated by the FlatBuffers compiler, do not modify - - -#ifndef FLATBUFFERS_GENERATED_RELATION_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ -#define FLATBUFFERS_GENERATED_RELATION_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ - -#include "flatbuffers/flatbuffers.h" - -#include "Schema_generated.h" -#include "Expression_generated.h" -#include "Literal_generated.h" - -namespace org { -namespace apache { -namespace arrow { -namespace computeir { -namespace flatbuf { - -struct RelId; -struct RelIdBuilder; - -struct Filter; -struct FilterBuilder; - -struct Project; -struct ProjectBuilder; - -struct Grouping; -struct GroupingBuilder; - -struct Aggregate; -struct AggregateBuilder; - -struct Join; -struct JoinBuilder; - -struct OrderBy; -struct OrderByBuilder; - -struct Limit; -struct LimitBuilder; - -struct SetOperation; -struct SetOperationBuilder; - -struct LiteralColumn; -struct LiteralColumnBuilder; - -struct LiteralRelation; -struct LiteralRelationBuilder; - -struct Source; -struct SourceBuilder; - -struct Relation; -struct RelationBuilder; - -enum class JoinKind : uint8_t { - Anti = 0, - Cross = 1, - FullOuter = 2, - Inner = 3, - LeftOuter = 4, - LeftSemi = 5, - RightOuter = 6, - MIN = Anti, - MAX = RightOuter -}; - -inline const JoinKind (&EnumValuesJoinKind())[7] { - static const JoinKind values[] = { - JoinKind::Anti, - JoinKind::Cross, - JoinKind::FullOuter, - JoinKind::Inner, - JoinKind::LeftOuter, - JoinKind::LeftSemi, - JoinKind::RightOuter - }; - return values; -} - -inline const char * const *EnumNamesJoinKind() { - static const char * const names[8] = { - "Anti", - "Cross", - "FullOuter", - "Inner", - "LeftOuter", - "LeftSemi", - "RightOuter", - nullptr - }; - return names; -} - -inline const char *EnumNameJoinKind(JoinKind e) { - if (flatbuffers::IsOutRange(e, JoinKind::Anti, JoinKind::RightOuter)) return ""; - const size_t index = static_cast(e); - return EnumNamesJoinKind()[index]; -} - -/// The kind of set operation being performed. -enum class SetOpKind : uint8_t { - Union = 0, - Intersection = 1, - Difference = 2, - MIN = Union, - MAX = Difference -}; - -inline const SetOpKind (&EnumValuesSetOpKind())[3] { - static const SetOpKind values[] = { - SetOpKind::Union, - SetOpKind::Intersection, - SetOpKind::Difference - }; - return values; -} - -inline const char * const *EnumNamesSetOpKind() { - static const char * const names[4] = { - "Union", - "Intersection", - "Difference", - nullptr - }; - return names; -} - -inline const char *EnumNameSetOpKind(SetOpKind e) { - if (flatbuffers::IsOutRange(e, SetOpKind::Union, SetOpKind::Difference)) return ""; - const size_t index = static_cast(e); - return EnumNamesSetOpKind()[index]; -} - -/// The varieties of relations -enum class RelationImpl : uint8_t { - NONE = 0, - Aggregate = 1, - Filter = 2, - Join = 3, - Limit = 4, - LiteralRelation = 5, - OrderBy = 6, - Project = 7, - SetOperation = 8, - Source = 9, - MIN = NONE, - MAX = Source -}; - -inline const RelationImpl (&EnumValuesRelationImpl())[10] { - static const RelationImpl values[] = { - RelationImpl::NONE, - RelationImpl::Aggregate, - RelationImpl::Filter, - RelationImpl::Join, - RelationImpl::Limit, - RelationImpl::LiteralRelation, - RelationImpl::OrderBy, - RelationImpl::Project, - RelationImpl::SetOperation, - RelationImpl::Source - }; - return values; -} - -inline const char * const *EnumNamesRelationImpl() { - static const char * const names[11] = { - "NONE", - "Aggregate", - "Filter", - "Join", - "Limit", - "LiteralRelation", - "OrderBy", - "Project", - "SetOperation", - "Source", - nullptr - }; - return names; -} - -inline const char *EnumNameRelationImpl(RelationImpl e) { - if (flatbuffers::IsOutRange(e, RelationImpl::NONE, RelationImpl::Source)) return ""; - const size_t index = static_cast(e); - return EnumNamesRelationImpl()[index]; -} - -template struct RelationImplTraits { - static const RelationImpl enum_value = RelationImpl::NONE; -}; - -template<> struct RelationImplTraits { - static const RelationImpl enum_value = RelationImpl::Aggregate; -}; - -template<> struct RelationImplTraits { - static const RelationImpl enum_value = RelationImpl::Filter; -}; - -template<> struct RelationImplTraits { - static const RelationImpl enum_value = RelationImpl::Join; -}; - -template<> struct RelationImplTraits { - static const RelationImpl enum_value = RelationImpl::Limit; -}; - -template<> struct RelationImplTraits { - static const RelationImpl enum_value = RelationImpl::LiteralRelation; -}; - -template<> struct RelationImplTraits { - static const RelationImpl enum_value = RelationImpl::OrderBy; -}; - -template<> struct RelationImplTraits { - static const RelationImpl enum_value = RelationImpl::Project; -}; - -template<> struct RelationImplTraits { - static const RelationImpl enum_value = RelationImpl::SetOperation; -}; - -template<> struct RelationImplTraits { - static const RelationImpl enum_value = RelationImpl::Source; -}; - -bool VerifyRelationImpl(flatbuffers::Verifier &verifier, const void *obj, RelationImpl type); -bool VerifyRelationImplVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); - -/// An identifier for relations in a query. -/// -/// A table is used here to allow plan implementations optionality. -struct RelId FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef RelIdBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ID = 4 - }; - uint64_t id() const { - return GetField(VT_ID, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_ID) && - verifier.EndTable(); - } -}; - -struct RelIdBuilder { - typedef RelId Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_id(uint64_t id) { - fbb_.AddElement(RelId::VT_ID, id, 0); - } - explicit RelIdBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - RelIdBuilder &operator=(const RelIdBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateRelId( - flatbuffers::FlatBufferBuilder &_fbb, - uint64_t id = 0) { - RelIdBuilder builder_(_fbb); - builder_.add_id(id); - return builder_.Finish(); -} - -/// Filter operation -struct Filter FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef FilterBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ID = 4, - VT_REL = 6, - VT_PREDICATE = 8 - }; - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - const org::apache::arrow::computeir::flatbuf::RelId *id() const { - return GetPointer(VT_ID); - } - /// Child relation - const org::apache::arrow::computeir::flatbuf::Relation *rel() const { - return GetPointer(VT_REL); - } - /// The expression which will be evaluated against input rows - /// to determine whether they should be excluded from the - /// filter relation's output. - const org::apache::arrow::computeir::flatbuf::Expression *predicate() const { - return GetPointer(VT_PREDICATE); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_ID) && - verifier.VerifyTable(id()) && - VerifyOffsetRequired(verifier, VT_REL) && - verifier.VerifyTable(rel()) && - VerifyOffsetRequired(verifier, VT_PREDICATE) && - verifier.VerifyTable(predicate()) && - verifier.EndTable(); - } -}; - -struct FilterBuilder { - typedef Filter Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_id(flatbuffers::Offset id) { - fbb_.AddOffset(Filter::VT_ID, id); - } - void add_rel(flatbuffers::Offset rel) { - fbb_.AddOffset(Filter::VT_REL, rel); - } - void add_predicate(flatbuffers::Offset predicate) { - fbb_.AddOffset(Filter::VT_PREDICATE, predicate); - } - explicit FilterBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - FilterBuilder &operator=(const FilterBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Filter::VT_REL); - fbb_.Required(o, Filter::VT_PREDICATE); - return o; - } -}; - -inline flatbuffers::Offset CreateFilter( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset rel = 0, - flatbuffers::Offset predicate = 0) { - FilterBuilder builder_(_fbb); - builder_.add_predicate(predicate); - builder_.add_rel(rel); - builder_.add_id(id); - return builder_.Finish(); -} - -/// Projection -struct Project FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef ProjectBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ID = 4, - VT_REL = 6, - VT_EXPRESSIONS = 8 - }; - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - const org::apache::arrow::computeir::flatbuf::RelId *id() const { - return GetPointer(VT_ID); - } - /// Child relation - const org::apache::arrow::computeir::flatbuf::Relation *rel() const { - return GetPointer(VT_REL); - } - /// Expressions which will be evaluated to produce to - /// the rows of the project relation's output. - const flatbuffers::Vector> *expressions() const { - return GetPointer> *>(VT_EXPRESSIONS); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_ID) && - verifier.VerifyTable(id()) && - VerifyOffsetRequired(verifier, VT_REL) && - verifier.VerifyTable(rel()) && - VerifyOffsetRequired(verifier, VT_EXPRESSIONS) && - verifier.VerifyVector(expressions()) && - verifier.VerifyVectorOfTables(expressions()) && - verifier.EndTable(); - } -}; - -struct ProjectBuilder { - typedef Project Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_id(flatbuffers::Offset id) { - fbb_.AddOffset(Project::VT_ID, id); - } - void add_rel(flatbuffers::Offset rel) { - fbb_.AddOffset(Project::VT_REL, rel); - } - void add_expressions(flatbuffers::Offset>> expressions) { - fbb_.AddOffset(Project::VT_EXPRESSIONS, expressions); - } - explicit ProjectBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - ProjectBuilder &operator=(const ProjectBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Project::VT_REL); - fbb_.Required(o, Project::VT_EXPRESSIONS); - return o; - } -}; - -inline flatbuffers::Offset CreateProject( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset rel = 0, - flatbuffers::Offset>> expressions = 0) { - ProjectBuilder builder_(_fbb); - builder_.add_expressions(expressions); - builder_.add_rel(rel); - builder_.add_id(id); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateProjectDirect( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset rel = 0, - const std::vector> *expressions = nullptr) { - auto expressions__ = expressions ? _fbb.CreateVector>(*expressions) : 0; - return org::apache::arrow::computeir::flatbuf::CreateProject( - _fbb, - id, - rel, - expressions__); -} - -/// A set of grouping keys -struct Grouping FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef GroupingBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_KEYS = 4 - }; - /// Expressions to group by - const flatbuffers::Vector> *keys() const { - return GetPointer> *>(VT_KEYS); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_KEYS) && - verifier.VerifyVector(keys()) && - verifier.VerifyVectorOfTables(keys()) && - verifier.EndTable(); - } -}; - -struct GroupingBuilder { - typedef Grouping Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_keys(flatbuffers::Offset>> keys) { - fbb_.AddOffset(Grouping::VT_KEYS, keys); - } - explicit GroupingBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - GroupingBuilder &operator=(const GroupingBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Grouping::VT_KEYS); - return o; - } -}; - -inline flatbuffers::Offset CreateGrouping( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> keys = 0) { - GroupingBuilder builder_(_fbb); - builder_.add_keys(keys); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateGroupingDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector> *keys = nullptr) { - auto keys__ = keys ? _fbb.CreateVector>(*keys) : 0; - return org::apache::arrow::computeir::flatbuf::CreateGrouping( - _fbb, - keys__); -} - -/// Aggregate operation -struct Aggregate FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef AggregateBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ID = 4, - VT_REL = 6, - VT_MEASURES = 8, - VT_GROUPINGS = 10 - }; - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - const org::apache::arrow::computeir::flatbuf::RelId *id() const { - return GetPointer(VT_ID); - } - /// Child relation - const org::apache::arrow::computeir::flatbuf::Relation *rel() const { - return GetPointer(VT_REL); - } - /// Expressions which will be evaluated to produce to - /// the rows of the aggregate relation's output. - const flatbuffers::Vector> *measures() const { - return GetPointer> *>(VT_MEASURES); - } - /// Keys by which `aggregations` will be grouped. - /// - /// The nested list here is to support grouping sets - /// eg - /// - /// SELECT a, b, c, sum(d) - /// FROM t - /// GROUP BY - /// GROUPING SETS ( - /// (a, b, c), - /// (a, b), - /// (a), - /// () - /// ); - const flatbuffers::Vector> *groupings() const { - return GetPointer> *>(VT_GROUPINGS); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_ID) && - verifier.VerifyTable(id()) && - VerifyOffsetRequired(verifier, VT_REL) && - verifier.VerifyTable(rel()) && - VerifyOffsetRequired(verifier, VT_MEASURES) && - verifier.VerifyVector(measures()) && - verifier.VerifyVectorOfTables(measures()) && - VerifyOffsetRequired(verifier, VT_GROUPINGS) && - verifier.VerifyVector(groupings()) && - verifier.VerifyVectorOfTables(groupings()) && - verifier.EndTable(); - } -}; - -struct AggregateBuilder { - typedef Aggregate Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_id(flatbuffers::Offset id) { - fbb_.AddOffset(Aggregate::VT_ID, id); - } - void add_rel(flatbuffers::Offset rel) { - fbb_.AddOffset(Aggregate::VT_REL, rel); - } - void add_measures(flatbuffers::Offset>> measures) { - fbb_.AddOffset(Aggregate::VT_MEASURES, measures); - } - void add_groupings(flatbuffers::Offset>> groupings) { - fbb_.AddOffset(Aggregate::VT_GROUPINGS, groupings); - } - explicit AggregateBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - AggregateBuilder &operator=(const AggregateBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Aggregate::VT_REL); - fbb_.Required(o, Aggregate::VT_MEASURES); - fbb_.Required(o, Aggregate::VT_GROUPINGS); - return o; - } -}; - -inline flatbuffers::Offset CreateAggregate( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset rel = 0, - flatbuffers::Offset>> measures = 0, - flatbuffers::Offset>> groupings = 0) { - AggregateBuilder builder_(_fbb); - builder_.add_groupings(groupings); - builder_.add_measures(measures); - builder_.add_rel(rel); - builder_.add_id(id); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateAggregateDirect( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset rel = 0, - const std::vector> *measures = nullptr, - const std::vector> *groupings = nullptr) { - auto measures__ = measures ? _fbb.CreateVector>(*measures) : 0; - auto groupings__ = groupings ? _fbb.CreateVector>(*groupings) : 0; - return org::apache::arrow::computeir::flatbuf::CreateAggregate( - _fbb, - id, - rel, - measures__, - groupings__); -} - -/// Join between two tables -struct Join FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef JoinBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ID = 4, - VT_LEFT = 6, - VT_RIGHT = 8, - VT_ON_EXPRESSION = 10, - VT_JOIN_KIND = 12 - }; - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - const org::apache::arrow::computeir::flatbuf::RelId *id() const { - return GetPointer(VT_ID); - } - /// Left relation - const org::apache::arrow::computeir::flatbuf::Relation *left() const { - return GetPointer(VT_LEFT); - } - /// Right relation - const org::apache::arrow::computeir::flatbuf::Relation *right() const { - return GetPointer(VT_RIGHT); - } - /// The expression which will be evaluated against rows from each - /// input to determine whether they should be included in the - /// join relation's output. - const org::apache::arrow::computeir::flatbuf::Expression *on_expression() const { - return GetPointer(VT_ON_EXPRESSION); - } - /// The kind of join to use. - org::apache::arrow::computeir::flatbuf::JoinKind join_kind() const { - return static_cast(GetField(VT_JOIN_KIND, 0)); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_ID) && - verifier.VerifyTable(id()) && - VerifyOffsetRequired(verifier, VT_LEFT) && - verifier.VerifyTable(left()) && - VerifyOffsetRequired(verifier, VT_RIGHT) && - verifier.VerifyTable(right()) && - VerifyOffsetRequired(verifier, VT_ON_EXPRESSION) && - verifier.VerifyTable(on_expression()) && - VerifyField(verifier, VT_JOIN_KIND) && - verifier.EndTable(); - } -}; - -struct JoinBuilder { - typedef Join Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_id(flatbuffers::Offset id) { - fbb_.AddOffset(Join::VT_ID, id); - } - void add_left(flatbuffers::Offset left) { - fbb_.AddOffset(Join::VT_LEFT, left); - } - void add_right(flatbuffers::Offset right) { - fbb_.AddOffset(Join::VT_RIGHT, right); - } - void add_on_expression(flatbuffers::Offset on_expression) { - fbb_.AddOffset(Join::VT_ON_EXPRESSION, on_expression); - } - void add_join_kind(org::apache::arrow::computeir::flatbuf::JoinKind join_kind) { - fbb_.AddElement(Join::VT_JOIN_KIND, static_cast(join_kind), 0); - } - explicit JoinBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - JoinBuilder &operator=(const JoinBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Join::VT_LEFT); - fbb_.Required(o, Join::VT_RIGHT); - fbb_.Required(o, Join::VT_ON_EXPRESSION); - return o; - } -}; - -inline flatbuffers::Offset CreateJoin( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset left = 0, - flatbuffers::Offset right = 0, - flatbuffers::Offset on_expression = 0, - org::apache::arrow::computeir::flatbuf::JoinKind join_kind = org::apache::arrow::computeir::flatbuf::JoinKind::Anti) { - JoinBuilder builder_(_fbb); - builder_.add_on_expression(on_expression); - builder_.add_right(right); - builder_.add_left(left); - builder_.add_id(id); - builder_.add_join_kind(join_kind); - return builder_.Finish(); -} - -/// Order by relation -struct OrderBy FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef OrderByBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ID = 4, - VT_REL = 6, - VT_KEYS = 8 - }; - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - const org::apache::arrow::computeir::flatbuf::RelId *id() const { - return GetPointer(VT_ID); - } - /// Child relation - const org::apache::arrow::computeir::flatbuf::Relation *rel() const { - return GetPointer(VT_REL); - } - /// Define sort order for rows of output. - /// Keys with higher precedence are ordered ahead of other keys. - const flatbuffers::Vector> *keys() const { - return GetPointer> *>(VT_KEYS); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_ID) && - verifier.VerifyTable(id()) && - VerifyOffsetRequired(verifier, VT_REL) && - verifier.VerifyTable(rel()) && - VerifyOffsetRequired(verifier, VT_KEYS) && - verifier.VerifyVector(keys()) && - verifier.VerifyVectorOfTables(keys()) && - verifier.EndTable(); - } -}; - -struct OrderByBuilder { - typedef OrderBy Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_id(flatbuffers::Offset id) { - fbb_.AddOffset(OrderBy::VT_ID, id); - } - void add_rel(flatbuffers::Offset rel) { - fbb_.AddOffset(OrderBy::VT_REL, rel); - } - void add_keys(flatbuffers::Offset>> keys) { - fbb_.AddOffset(OrderBy::VT_KEYS, keys); - } - explicit OrderByBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - OrderByBuilder &operator=(const OrderByBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, OrderBy::VT_REL); - fbb_.Required(o, OrderBy::VT_KEYS); - return o; - } -}; - -inline flatbuffers::Offset CreateOrderBy( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset rel = 0, - flatbuffers::Offset>> keys = 0) { - OrderByBuilder builder_(_fbb); - builder_.add_keys(keys); - builder_.add_rel(rel); - builder_.add_id(id); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateOrderByDirect( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset rel = 0, - const std::vector> *keys = nullptr) { - auto keys__ = keys ? _fbb.CreateVector>(*keys) : 0; - return org::apache::arrow::computeir::flatbuf::CreateOrderBy( - _fbb, - id, - rel, - keys__); -} - -/// Limit operation -struct Limit FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef LimitBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ID = 4, - VT_REL = 6, - VT_OFFSET = 8, - VT_COUNT = 10 - }; - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - const org::apache::arrow::computeir::flatbuf::RelId *id() const { - return GetPointer(VT_ID); - } - /// Child relation - const org::apache::arrow::computeir::flatbuf::Relation *rel() const { - return GetPointer(VT_REL); - } - /// Starting index of rows - uint32_t offset() const { - return GetField(VT_OFFSET, 0); - } - /// The maximum number of rows of output. - uint32_t count() const { - return GetField(VT_COUNT, 0); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_ID) && - verifier.VerifyTable(id()) && - VerifyOffsetRequired(verifier, VT_REL) && - verifier.VerifyTable(rel()) && - VerifyField(verifier, VT_OFFSET) && - VerifyField(verifier, VT_COUNT) && - verifier.EndTable(); - } -}; - -struct LimitBuilder { - typedef Limit Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_id(flatbuffers::Offset id) { - fbb_.AddOffset(Limit::VT_ID, id); - } - void add_rel(flatbuffers::Offset rel) { - fbb_.AddOffset(Limit::VT_REL, rel); - } - void add_offset(uint32_t offset) { - fbb_.AddElement(Limit::VT_OFFSET, offset, 0); - } - void add_count(uint32_t count) { - fbb_.AddElement(Limit::VT_COUNT, count, 0); - } - explicit LimitBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - LimitBuilder &operator=(const LimitBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Limit::VT_REL); - return o; - } -}; - -inline flatbuffers::Offset CreateLimit( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset rel = 0, - uint32_t offset = 0, - uint32_t count = 0) { - LimitBuilder builder_(_fbb); - builder_.add_count(count); - builder_.add_offset(offset); - builder_.add_rel(rel); - builder_.add_id(id); - return builder_.Finish(); -} - -/// A set operation on two or more relations -struct SetOperation FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef SetOperationBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ID = 4, - VT_RELS = 6, - VT_SET_OP = 8 - }; - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - const org::apache::arrow::computeir::flatbuf::RelId *id() const { - return GetPointer(VT_ID); - } - /// Child relations - const flatbuffers::Vector> *rels() const { - return GetPointer> *>(VT_RELS); - } - /// The kind of set operation - org::apache::arrow::computeir::flatbuf::SetOpKind set_op() const { - return static_cast(GetField(VT_SET_OP, 0)); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_ID) && - verifier.VerifyTable(id()) && - VerifyOffsetRequired(verifier, VT_RELS) && - verifier.VerifyVector(rels()) && - verifier.VerifyVectorOfTables(rels()) && - VerifyField(verifier, VT_SET_OP) && - verifier.EndTable(); - } -}; - -struct SetOperationBuilder { - typedef SetOperation Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_id(flatbuffers::Offset id) { - fbb_.AddOffset(SetOperation::VT_ID, id); - } - void add_rels(flatbuffers::Offset>> rels) { - fbb_.AddOffset(SetOperation::VT_RELS, rels); - } - void add_set_op(org::apache::arrow::computeir::flatbuf::SetOpKind set_op) { - fbb_.AddElement(SetOperation::VT_SET_OP, static_cast(set_op), 0); - } - explicit SetOperationBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - SetOperationBuilder &operator=(const SetOperationBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, SetOperation::VT_RELS); - return o; - } -}; - -inline flatbuffers::Offset CreateSetOperation( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset>> rels = 0, - org::apache::arrow::computeir::flatbuf::SetOpKind set_op = org::apache::arrow::computeir::flatbuf::SetOpKind::Union) { - SetOperationBuilder builder_(_fbb); - builder_.add_rels(rels); - builder_.add_id(id); - builder_.add_set_op(set_op); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateSetOperationDirect( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - const std::vector> *rels = nullptr, - org::apache::arrow::computeir::flatbuf::SetOpKind set_op = org::apache::arrow::computeir::flatbuf::SetOpKind::Union) { - auto rels__ = rels ? _fbb.CreateVector>(*rels) : 0; - return org::apache::arrow::computeir::flatbuf::CreateSetOperation( - _fbb, - id, - rels__, - set_op); -} - -/// A single column of literal values. -struct LiteralColumn FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef LiteralColumnBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ELEMENTS = 4 - }; - /// The literal values of the column - const flatbuffers::Vector> *elements() const { - return GetPointer> *>(VT_ELEMENTS); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_ELEMENTS) && - verifier.VerifyVector(elements()) && - verifier.VerifyVectorOfTables(elements()) && - verifier.EndTable(); - } -}; - -struct LiteralColumnBuilder { - typedef LiteralColumn Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_elements(flatbuffers::Offset>> elements) { - fbb_.AddOffset(LiteralColumn::VT_ELEMENTS, elements); - } - explicit LiteralColumnBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - LiteralColumnBuilder &operator=(const LiteralColumnBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, LiteralColumn::VT_ELEMENTS); - return o; - } -}; - -inline flatbuffers::Offset CreateLiteralColumn( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> elements = 0) { - LiteralColumnBuilder builder_(_fbb); - builder_.add_elements(elements); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateLiteralColumnDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector> *elements = nullptr) { - auto elements__ = elements ? _fbb.CreateVector>(*elements) : 0; - return org::apache::arrow::computeir::flatbuf::CreateLiteralColumn( - _fbb, - elements__); -} - -/// Literal relation -struct LiteralRelation FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef LiteralRelationBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ID = 4, - VT_COLUMNS = 6 - }; - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - const org::apache::arrow::computeir::flatbuf::RelId *id() const { - return GetPointer(VT_ID); - } - /// The columns of this literal relation. - const flatbuffers::Vector> *columns() const { - return GetPointer> *>(VT_COLUMNS); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_ID) && - verifier.VerifyTable(id()) && - VerifyOffsetRequired(verifier, VT_COLUMNS) && - verifier.VerifyVector(columns()) && - verifier.VerifyVectorOfTables(columns()) && - verifier.EndTable(); - } -}; - -struct LiteralRelationBuilder { - typedef LiteralRelation Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_id(flatbuffers::Offset id) { - fbb_.AddOffset(LiteralRelation::VT_ID, id); - } - void add_columns(flatbuffers::Offset>> columns) { - fbb_.AddOffset(LiteralRelation::VT_COLUMNS, columns); - } - explicit LiteralRelationBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - LiteralRelationBuilder &operator=(const LiteralRelationBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, LiteralRelation::VT_COLUMNS); - return o; - } -}; - -inline flatbuffers::Offset CreateLiteralRelation( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset>> columns = 0) { - LiteralRelationBuilder builder_(_fbb); - builder_.add_columns(columns); - builder_.add_id(id); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateLiteralRelationDirect( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - const std::vector> *columns = nullptr) { - auto columns__ = columns ? _fbb.CreateVector>(*columns) : 0; - return org::apache::arrow::computeir::flatbuf::CreateLiteralRelation( - _fbb, - id, - columns__); -} - -/// An external source of tabular data -struct Source FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef SourceBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_ID = 4, - VT_NAME = 6, - VT_FILTER = 8, - VT_SCHEMA = 10, - VT_PROJECTION = 12 - }; - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - const org::apache::arrow::computeir::flatbuf::RelId *id() const { - return GetPointer(VT_ID); - } - const flatbuffers::String *name() const { - return GetPointer(VT_NAME); - } - /// An optional expression used to filter out rows directly from the source. - /// - /// Useful for consumers that implement predicate pushdown. - /// - /// A missing filter value indicates no filter, i.e., all rows are - /// returned from the source. - const org::apache::arrow::computeir::flatbuf::Expression *filter() const { - return GetPointer(VT_FILTER); - } - /// Schemas are explicitly optional - const org::apache::arrow::flatbuf::Schema *schema() const { - return GetPointer(VT_SCHEMA); - } - /// An optional list of field indices indicating which columns should be read - /// from the source. Columns excluded from this listing will instead be replaced - /// with all-null placeholders to guarantee that the schema of the source is - /// unaffected by this projection. - /// - /// A missing value indicates all columns should be read. - /// - /// The behavior of an empty list is undefined. - const flatbuffers::Vector> *projection() const { - return GetPointer> *>(VT_PROJECTION); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffset(verifier, VT_ID) && - verifier.VerifyTable(id()) && - VerifyOffsetRequired(verifier, VT_NAME) && - verifier.VerifyString(name()) && - VerifyOffset(verifier, VT_FILTER) && - verifier.VerifyTable(filter()) && - VerifyOffset(verifier, VT_SCHEMA) && - verifier.VerifyTable(schema()) && - VerifyOffset(verifier, VT_PROJECTION) && - verifier.VerifyVector(projection()) && - verifier.VerifyVectorOfTables(projection()) && - verifier.EndTable(); - } -}; - -struct SourceBuilder { - typedef Source Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_id(flatbuffers::Offset id) { - fbb_.AddOffset(Source::VT_ID, id); - } - void add_name(flatbuffers::Offset name) { - fbb_.AddOffset(Source::VT_NAME, name); - } - void add_filter(flatbuffers::Offset filter) { - fbb_.AddOffset(Source::VT_FILTER, filter); - } - void add_schema(flatbuffers::Offset schema) { - fbb_.AddOffset(Source::VT_SCHEMA, schema); - } - void add_projection(flatbuffers::Offset>> projection) { - fbb_.AddOffset(Source::VT_PROJECTION, projection); - } - explicit SourceBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - SourceBuilder &operator=(const SourceBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Source::VT_NAME); - return o; - } -}; - -inline flatbuffers::Offset CreateSource( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - flatbuffers::Offset name = 0, - flatbuffers::Offset filter = 0, - flatbuffers::Offset schema = 0, - flatbuffers::Offset>> projection = 0) { - SourceBuilder builder_(_fbb); - builder_.add_projection(projection); - builder_.add_schema(schema); - builder_.add_filter(filter); - builder_.add_name(name); - builder_.add_id(id); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateSourceDirect( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset id = 0, - const char *name = nullptr, - flatbuffers::Offset filter = 0, - flatbuffers::Offset schema = 0, - const std::vector> *projection = nullptr) { - auto name__ = name ? _fbb.CreateString(name) : 0; - auto projection__ = projection ? _fbb.CreateVector>(*projection) : 0; - return org::apache::arrow::computeir::flatbuf::CreateSource( - _fbb, - id, - name__, - filter, - schema, - projection__); -} - -/// A table holding an instance of the possible relation types. -struct Relation FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef RelationBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_IMPL_TYPE = 4, - VT_IMPL = 6 - }; - org::apache::arrow::computeir::flatbuf::RelationImpl impl_type() const { - return static_cast(GetField(VT_IMPL_TYPE, 0)); - } - const void *impl() const { - return GetPointer(VT_IMPL); - } - template const T *impl_as() const; - const org::apache::arrow::computeir::flatbuf::Aggregate *impl_as_Aggregate() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::RelationImpl::Aggregate ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Filter *impl_as_Filter() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::RelationImpl::Filter ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Join *impl_as_Join() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::RelationImpl::Join ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Limit *impl_as_Limit() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::RelationImpl::Limit ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::LiteralRelation *impl_as_LiteralRelation() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::RelationImpl::LiteralRelation ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::OrderBy *impl_as_OrderBy() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::RelationImpl::OrderBy ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Project *impl_as_Project() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::RelationImpl::Project ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::SetOperation *impl_as_SetOperation() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::RelationImpl::SetOperation ? static_cast(impl()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::Source *impl_as_Source() const { - return impl_type() == org::apache::arrow::computeir::flatbuf::RelationImpl::Source ? static_cast(impl()) : nullptr; - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_IMPL_TYPE) && - VerifyOffsetRequired(verifier, VT_IMPL) && - VerifyRelationImpl(verifier, impl(), impl_type()) && - verifier.EndTable(); - } -}; - -template<> inline const org::apache::arrow::computeir::flatbuf::Aggregate *Relation::impl_as() const { - return impl_as_Aggregate(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Filter *Relation::impl_as() const { - return impl_as_Filter(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Join *Relation::impl_as() const { - return impl_as_Join(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Limit *Relation::impl_as() const { - return impl_as_Limit(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::LiteralRelation *Relation::impl_as() const { - return impl_as_LiteralRelation(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::OrderBy *Relation::impl_as() const { - return impl_as_OrderBy(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Project *Relation::impl_as() const { - return impl_as_Project(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::SetOperation *Relation::impl_as() const { - return impl_as_SetOperation(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::Source *Relation::impl_as() const { - return impl_as_Source(); -} - -struct RelationBuilder { - typedef Relation Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_impl_type(org::apache::arrow::computeir::flatbuf::RelationImpl impl_type) { - fbb_.AddElement(Relation::VT_IMPL_TYPE, static_cast(impl_type), 0); - } - void add_impl(flatbuffers::Offset impl) { - fbb_.AddOffset(Relation::VT_IMPL, impl); - } - explicit RelationBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - RelationBuilder &operator=(const RelationBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Relation::VT_IMPL); - return o; - } -}; - -inline flatbuffers::Offset CreateRelation( - flatbuffers::FlatBufferBuilder &_fbb, - org::apache::arrow::computeir::flatbuf::RelationImpl impl_type = org::apache::arrow::computeir::flatbuf::RelationImpl::NONE, - flatbuffers::Offset impl = 0) { - RelationBuilder builder_(_fbb); - builder_.add_impl(impl); - builder_.add_impl_type(impl_type); - return builder_.Finish(); -} - -inline bool VerifyRelationImpl(flatbuffers::Verifier &verifier, const void *obj, RelationImpl type) { - switch (type) { - case RelationImpl::NONE: { - return true; - } - case RelationImpl::Aggregate: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case RelationImpl::Filter: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case RelationImpl::Join: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case RelationImpl::Limit: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case RelationImpl::LiteralRelation: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case RelationImpl::OrderBy: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case RelationImpl::Project: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case RelationImpl::SetOperation: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case RelationImpl::Source: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - default: return true; - } -} - -inline bool VerifyRelationImplVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { - if (!values || !types) return !values && !types; - if (values->size() != types->size()) return false; - for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { - if (!VerifyRelationImpl( - verifier, values->Get(i), types->GetEnum(i))) { - return false; - } - } - return true; -} - -inline const org::apache::arrow::computeir::flatbuf::Relation *GetRelation(const void *buf) { - return flatbuffers::GetRoot(buf); -} - -inline const org::apache::arrow::computeir::flatbuf::Relation *GetSizePrefixedRelation(const void *buf) { - return flatbuffers::GetSizePrefixedRoot(buf); -} - -inline bool VerifyRelationBuffer( - flatbuffers::Verifier &verifier) { - return verifier.VerifyBuffer(nullptr); -} - -inline bool VerifySizePrefixedRelationBuffer( - flatbuffers::Verifier &verifier) { - return verifier.VerifySizePrefixedBuffer(nullptr); -} - -inline void FinishRelationBuffer( - flatbuffers::FlatBufferBuilder &fbb, - flatbuffers::Offset root) { - fbb.Finish(root); -} - -inline void FinishSizePrefixedRelationBuffer( - flatbuffers::FlatBufferBuilder &fbb, - flatbuffers::Offset root) { - fbb.FinishSizePrefixed(root); -} - -} // namespace flatbuf -} // namespace computeir -} // namespace arrow -} // namespace apache -} // namespace org - -#endif // FLATBUFFERS_GENERATED_RELATION_ORG_APACHE_ARROW_COMPUTEIR_FLATBUF_H_ diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 542fa5bc083bc..caed261734280 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -106,9 +106,11 @@ endfunction() if(ARROW_BUILD_STATIC) set(PARQUET_STATIC_LINK_LIBS arrow_static ${ARROW_STATIC_LINK_LIBS}) + set(PARQUET_STATIC_INTERFACE_INSTALL_LIBS arrow_static) set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_static arrow_static ${ARROW_STATIC_LINK_LIBS}) else() + set(PARQUET_STATIC_INTERFACE_INSTALL_LIBS) set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_shared arrow_shared) endif() @@ -218,8 +220,9 @@ if(NOT PARQUET_MINIMAL_DEPENDENCY) # Link publicly with parquet_static (because internal users need to # transitively link all dependencies) - set(PARQUET_STATIC_LINK_LIBS ${PARQUET_STATIC_LINK_LIBS} thrift::thrift) -endif(NOT PARQUET_MINIMAL_DEPENDENCY) + list(APPEND PARQUET_STATIC_LINK_LIBS thrift::thrift) + list(APPEND PARQUET_STATIC_INTERFACE_INSTALL_LIBS thrift::thrift) +endif() if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT) set(PARQUET_SHARED_LINK_FLAGS @@ -243,8 +246,12 @@ add_arrow_lib(parquet ${PARQUET_SHARED_LINK_LIBS} SHARED_PRIVATE_LINK_LIBS ${PARQUET_SHARED_PRIVATE_LINK_LIBS} + SHARED_INSTALL_INTERFACE_LIBS + arrow_shared STATIC_LINK_LIBS - ${PARQUET_STATIC_LINK_LIBS}) + ${PARQUET_STATIC_LINK_LIBS} + STATIC_INSTALL_INTERFACE_LIBS + ${PARQUET_STATIC_INTERFACE_INSTALL_LIBS}) if(WIN32 AND NOT (ARROW_TEST_LINKAGE STREQUAL "static")) add_library(parquet_test_support STATIC diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 523030fd783d9..c5899a47e9516 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -752,8 +752,11 @@ class ColumnReaderImplBase { repetition_level_decoder_.SetDataV2(page.repetition_levels_byte_length(), max_rep_level_, static_cast(num_buffered_values_), buffer); - buffer += page.repetition_levels_byte_length(); } + // ARROW-17453: Even if max_rep_level_ is 0, there may still be + // repetition level bytes written and/or reported in the header by + // some writers (e.g. Athena) + buffer += page.repetition_levels_byte_length(); if (max_def_level_ > 0) { definition_level_decoder_.SetDataV2(page.definition_levels_byte_length(), diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index eddfdfb04e83f..9fc034f41fd80 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -356,6 +356,39 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) { ParquetException); } +// Repetition level byte length reported in Page but Max Repetition level +// is zero for the column. +TEST_F(TestPrimitiveReader, TestRepetitionLvlBytesWithMaxRepetitionZero) { + constexpr int batch_size = 4; + max_def_level_ = 1; + max_rep_level_ = 0; + NodePtr type = schema::Int32("a", Repetition::OPTIONAL); + const ColumnDescriptor descr(type, max_def_level_, max_rep_level_); + // Bytes here came from the example parquet file in ARROW-17453's int32 + // column which was delta bit-packed. The key part is the first three + // bytes: the page header reports 1 byte for repetition levels even + // though the max rep level is 0. If that byte isn't skipped then + // we get def levels of [1, 1, 0, 0] instead of the correct [1, 1, 1, 0]. + const std::vector page_data{0x3, 0x3, 0x7, 0x80, 0x1, 0x4, 0x3, + 0x18, 0x1, 0x2, 0x0, 0x0, 0x0, 0xc, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; + + std::shared_ptr data_page = + std::make_shared(Buffer::Wrap(page_data.data(), page_data.size()), 4, 1, + 4, Encoding::DELTA_BINARY_PACKED, 2, 1, 21); + + pages_.push_back(data_page); + InitReader(&descr); + auto reader = static_cast(reader_.get()); + int16_t def_levels_out[batch_size]; + int32_t values[batch_size]; + int64_t values_read; + ASSERT_TRUE(reader->HasNext()); + EXPECT_EQ(4, reader->ReadBatch(batch_size, def_levels_out, /*replevels=*/nullptr, + values, &values_read)); + EXPECT_EQ(3, values_read); +} + // Page claims to have two values but only 1 is present. TEST_F(TestPrimitiveReader, TestReadValuesMissingWithDictionary) { constexpr int batch_size = 1; diff --git a/cpp/src/parquet/properties.cc b/cpp/src/parquet/properties.cc index 93638dbe28a92..b8e529896bd54 100644 --- a/cpp/src/parquet/properties.cc +++ b/cpp/src/parquet/properties.cc @@ -31,8 +31,9 @@ std::shared_ptr ReaderProperties::GetStream( if (buffered_stream_enabled_) { // ARROW-6180 / PARQUET-1636 Create isolated reader that references segment // of source - std::shared_ptr<::arrow::io::InputStream> safe_stream = - ::arrow::io::RandomAccessFile::GetStream(source, start, num_bytes); + PARQUET_ASSIGN_OR_THROW( + std::shared_ptr<::arrow::io::InputStream> safe_stream, + ::arrow::io::RandomAccessFile::GetStream(source, start, num_bytes)); PARQUET_ASSIGN_OR_THROW( auto stream, ::arrow::io::BufferedInputStream::Create(buffer_size_, pool_, safe_stream, num_bytes)); diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h index 9e06040226b80..1c440b1133ef7 100644 --- a/cpp/src/parquet/schema.h +++ b/cpp/src/parquet/schema.h @@ -285,7 +285,7 @@ class PARQUET_EXPORT GroupNode : public Node { bool Equals(const Node* other) const override; - NodePtr field(int i) const { return fields_[i]; } + const NodePtr& field(int i) const { return fields_[i]; } // Get the index of a field by its name, or negative value if not found. // If several fields share the same name, it is unspecified which one // is returned. diff --git a/cpp/src/plasma/CMakeLists.txt b/cpp/src/plasma/CMakeLists.txt index d78a5ccfdccbe..2573693738590 100644 --- a/cpp/src/plasma/CMakeLists.txt +++ b/cpp/src/plasma/CMakeLists.txt @@ -185,14 +185,14 @@ if(ARROW_PLASMA_JAVA_CLIENT) if(APPLE) target_link_libraries(plasma_java - plasma_shared - ${PLASMA_LINK_LIBS} + plasma_static + ${PLASMA_STATIC_LINK_LIBS} "-undefined dynamic_lookup" ${PTHREAD_LIBRARY}) - else(APPLE) - target_link_libraries(plasma_java plasma_shared ${PLASMA_LINK_LIBS} + else() + target_link_libraries(plasma_java plasma_static ${PLASMA_STATIC_LINK_LIBS} ${PTHREAD_LIBRARY}) - endif(APPLE) + endif() endif() # # Unit tests diff --git a/cpp/src/skyhook/CMakeLists.txt b/cpp/src/skyhook/CMakeLists.txt index 992c46741325c..0019251bd6071 100644 --- a/cpp/src/skyhook/CMakeLists.txt +++ b/cpp/src/skyhook/CMakeLists.txt @@ -19,7 +19,7 @@ add_subdirectory(client) # define the targets to build -add_custom_target(arrow_skyhook_client) +add_custom_target(arrow_skyhook) add_custom_target(cls_skyhook) # define the dependencies @@ -34,7 +34,7 @@ set(ARROW_SKYHOOK_CLS_SOURCES cls/cls_skyhook.cc protocol/rados_protocol.cc protocol/skyhook_protocol.cc) # define the client library -add_arrow_lib(arrow_skyhook_client +add_arrow_lib(arrow_skyhook PKG_CONFIG_NAME skyhook SOURCES @@ -58,15 +58,15 @@ add_arrow_lib(cls_skyhook ${ARROW_SKYHOOK_LINK_STATIC}) # finish building the project -add_dependencies(arrow_skyhook_client ${ARROW_SKYHOOK_CLIENT_LIBRARIES}) +add_dependencies(arrow_skyhook ${ARROW_SKYHOOK_CLIENT_LIBRARIES}) add_dependencies(cls_skyhook ${ARROW_SKYHOOK_CLS_LIBRARIES}) # define the test builds if(ARROW_TEST_LINKAGE STREQUAL "static") - set(ARROW_SKYHOOK_TEST_LINK_LIBS arrow_skyhook_client_static arrow_dataset_static + set(ARROW_SKYHOOK_TEST_LINK_LIBS arrow_skyhook_static arrow_dataset_static ${ARROW_TEST_STATIC_LINK_LIBS}) else() - set(ARROW_SKYHOOK_TEST_LINK_LIBS arrow_skyhook_client_shared arrow_dataset_shared + set(ARROW_SKYHOOK_TEST_LINK_LIBS arrow_skyhook_shared arrow_dataset_shared ${ARROW_TEST_SHARED_LINK_LIBS}) endif() diff --git a/cpp/src/skyhook/protocol/rados_protocol.cc b/cpp/src/skyhook/protocol/rados_protocol.cc index cb1acec1faa91..eb520787e74fe 100644 --- a/cpp/src/skyhook/protocol/rados_protocol.cc +++ b/cpp/src/skyhook/protocol/rados_protocol.cc @@ -58,7 +58,9 @@ arrow::Status RadosInterface::init2(const char* const name, const char* const cl arrow::Status RadosInterface::ioctx_create(const char* name, IoCtxInterface* pioctx) { librados::IoCtx ioCtx; int ret = cluster->ioctx_create(name, ioCtx); - pioctx->setIoCtx(&ioCtx); + if (!ret) { + pioctx->setIoCtx(&ioCtx); + } return GetStatusFromReturnCode(ret, "rados->ioctx_create failed."); } @@ -85,6 +87,7 @@ arrow::Status RadosConn::Connect() { ARROW_RETURN_NOT_OK(rados->conf_read_file(ctx->ceph_config_path.c_str())); ARROW_RETURN_NOT_OK(rados->connect()); ARROW_RETURN_NOT_OK(rados->ioctx_create(ctx->ceph_data_pool.c_str(), io_ctx.get())); + connected = true; return arrow::Status::OK(); } diff --git a/cpp/src/skyhook/skyhook.pc.in b/cpp/src/skyhook/skyhook.pc.in index 8f7acfa979748..5568d63c560d4 100644 --- a/cpp/src/skyhook/skyhook.pc.in +++ b/cpp/src/skyhook/skyhook.pc.in @@ -23,4 +23,4 @@ Name: Skyhook Description: Skyhook is a plugin for offloading computations into Ceph. Version: @SKYHOOK_VERSION@ Requires: arrow_dataset -Libs: -L${libdir} -larrow_skyhook_client +Libs: -L${libdir} -larrow_skyhook diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing index b76cde43bad62..aafd3fc9df431 160000 --- a/cpp/submodules/parquet-testing +++ b/cpp/submodules/parquet-testing @@ -1 +1 @@ -Subproject commit b76cde43bad62ebf531ae3736d7a59cf645d3a6f +Subproject commit aafd3fc9df431c2625a514fb46626e5614f1d199 diff --git a/cpp/thirdparty/flatbuffers/README.md b/cpp/thirdparty/flatbuffers/README.md index e955adba4cebb..447ad818a178a 100644 --- a/cpp/thirdparty/flatbuffers/README.md +++ b/cpp/thirdparty/flatbuffers/README.md @@ -18,7 +18,8 @@ --> This directory contains a vendored version of Flatbuffers -(unknown changeset), with the following patch for ARROW-15388: +(unknown changeset), with two patches: the first patch +for ARROW-15388 and the second patch for ARROW-17280. ```diff diff --git a/cpp/thirdparty/flatbuffers/include/flatbuffers/base.h b/cpp/thirdparty/flatbuffers/include/flatbuffers/base.h @@ -40,3 +41,137 @@ index 955738067..fccce42f6 100644 #endif // __has_include #endif // !FLATBUFFERS_HAS_STRING_VIEW ``` + +```diff +diff --git a/cpp/thirdparty/flatbuffers/include/flatbuffers/base.h b/cpp/thirdparty/flatbuffers/include/flatbuffers/base.h +index fccce42f6..a00d5b0fd 100644 +--- a/cpp/thirdparty/flatbuffers/include/flatbuffers/base.h ++++ b/cpp/thirdparty/flatbuffers/include/flatbuffers/base.h +@@ -1,6 +1,14 @@ + #ifndef FLATBUFFERS_BASE_H_ + #define FLATBUFFERS_BASE_H_ + ++// Move this vendored copy of flatbuffers to a private namespace, ++// but continue to access it through the "flatbuffers" alias. ++namespace arrow_vendored_private { ++namespace flatbuffers { ++} ++} ++namespace flatbuffers = arrow_vendored_private::flatbuffers; ++ + // clang-format off + + // If activate should be declared and included first. +@@ -144,10 +152,12 @@ + #define FLATBUFFERS_VERSION_REVISION 0 + #define FLATBUFFERS_STRING_EXPAND(X) #X + #define FLATBUFFERS_STRING(X) FLATBUFFERS_STRING_EXPAND(X) ++namespace arrow_vendored_private { + namespace flatbuffers { + // Returns version as string "MAJOR.MINOR.REVISION". + const char* FLATBUFFERS_VERSION(); + } ++} + + #if (!defined(_MSC_VER) || _MSC_VER > 1600) && \ + (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 407)) || \ +@@ -201,16 +211,20 @@ namespace flatbuffers { + // Check for std::string_view (in c++17) + #if __has_include() && (__cplusplus >= 201606 || (defined(_HAS_CXX17) && _HAS_CXX17)) + #include ++ namespace arrow_vendored_private { + namespace flatbuffers { + typedef std::string_view string_view; + } ++ } + #define FLATBUFFERS_HAS_STRING_VIEW 1 + // Check for std::experimental::string_view (in c++14, compiler-dependent) + #elif __has_include() && (__cplusplus >= 201411) + #include ++ namespace arrow_vendored_private { + namespace flatbuffers { + typedef std::experimental::string_view string_view; + } ++ } + #define FLATBUFFERS_HAS_STRING_VIEW 1 + #endif + #endif // __has_include +@@ -278,6 +292,7 @@ template FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(T t) { + /// @endcond + + /// @file ++namespace arrow_vendored_private { + namespace flatbuffers { + + /// @cond FLATBUFFERS_INTERNAL +@@ -388,4 +403,5 @@ inline size_t PaddingBytes(size_t buf_size, size_t scalar_size) { + } + + } // namespace flatbuffers ++} // namespace arrow_vendored_private + #endif // FLATBUFFERS_BASE_H_ +diff --git a/cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h b/cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h +index c4dc5bcd0..2f7eb5fcf 100644 +--- a/cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h ++++ b/cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h +@@ -23,6 +23,15 @@ + # include + #endif + ++// Move this vendored copy of flatbuffers to a private namespace, ++// but continue to access it through the "flatbuffers" alias. ++namespace arrow_vendored_private { ++namespace flatbuffers { ++} ++} ++namespace flatbuffers = arrow_vendored_private::flatbuffers; ++ ++namespace arrow_vendored_private { + namespace flatbuffers { + // Generic 'operator==' with conditional specialisations. + // T e - new value of a scalar field. +@@ -2777,6 +2786,7 @@ volatile __attribute__((weak)) const char *flatbuffer_version_string = + } + /// @endcond + } // namespace flatbuffers ++} // namespace arrow_vendored_private + + // clang-format on + +diff --git a/cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h b/cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h +index 8bae61bfd..7e5a95233 100644 +--- a/cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h ++++ b/cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h +@@ -25,6 +25,14 @@ + #include + #include + ++// Move this vendored copy of flatbuffers to a private namespace, ++// but continue to access it through the "flatbuffers" alias. ++namespace arrow_vendored_private { ++namespace flatbuffers { ++} ++} ++namespace flatbuffers = arrow_vendored_private::flatbuffers; ++ + #if defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL) + #define FLATBUFFERS_CPP98_STL + #endif // defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL) +@@ -44,6 +52,7 @@ + #endif + + // This header provides backwards compatibility for C++98 STLs like stlport. ++namespace arrow_vendored_private { + namespace flatbuffers { + + // Retrieve ::back() from a string in a way that is compatible with pre C++11 +@@ -303,5 +312,6 @@ inline void vector_emplace_back(std::vector *vector, V &&data) { + #endif // !FLATBUFFERS_CPP98_STL + + } // namespace flatbuffers ++} // namespace arrow_vendored_private + + #endif // FLATBUFFERS_STL_EMULATION_H_ +-- +2.25.1 +``` diff --git a/cpp/thirdparty/flatbuffers/include/flatbuffers/base.h b/cpp/thirdparty/flatbuffers/include/flatbuffers/base.h index fccce42f68aa6..a00d5b0fd2a59 100644 --- a/cpp/thirdparty/flatbuffers/include/flatbuffers/base.h +++ b/cpp/thirdparty/flatbuffers/include/flatbuffers/base.h @@ -1,6 +1,14 @@ #ifndef FLATBUFFERS_BASE_H_ #define FLATBUFFERS_BASE_H_ +// Move this vendored copy of flatbuffers to a private namespace, +// but continue to access it through the "flatbuffers" alias. +namespace arrow_vendored_private { +namespace flatbuffers { +} +} +namespace flatbuffers = arrow_vendored_private::flatbuffers; + // clang-format off // If activate should be declared and included first. @@ -144,10 +152,12 @@ #define FLATBUFFERS_VERSION_REVISION 0 #define FLATBUFFERS_STRING_EXPAND(X) #X #define FLATBUFFERS_STRING(X) FLATBUFFERS_STRING_EXPAND(X) +namespace arrow_vendored_private { namespace flatbuffers { // Returns version as string "MAJOR.MINOR.REVISION". const char* FLATBUFFERS_VERSION(); } +} #if (!defined(_MSC_VER) || _MSC_VER > 1600) && \ (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 407)) || \ @@ -201,16 +211,20 @@ namespace flatbuffers { // Check for std::string_view (in c++17) #if __has_include() && (__cplusplus >= 201606 || (defined(_HAS_CXX17) && _HAS_CXX17)) #include + namespace arrow_vendored_private { namespace flatbuffers { typedef std::string_view string_view; } + } #define FLATBUFFERS_HAS_STRING_VIEW 1 // Check for std::experimental::string_view (in c++14, compiler-dependent) #elif __has_include() && (__cplusplus >= 201411) #include + namespace arrow_vendored_private { namespace flatbuffers { typedef std::experimental::string_view string_view; } + } #define FLATBUFFERS_HAS_STRING_VIEW 1 #endif #endif // __has_include @@ -278,6 +292,7 @@ template FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(T t) { /// @endcond /// @file +namespace arrow_vendored_private { namespace flatbuffers { /// @cond FLATBUFFERS_INTERNAL @@ -388,4 +403,5 @@ inline size_t PaddingBytes(size_t buf_size, size_t scalar_size) { } } // namespace flatbuffers +} // namespace arrow_vendored_private #endif // FLATBUFFERS_BASE_H_ diff --git a/cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h b/cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h index c4dc5bcd03c57..2f7eb5fcf5197 100644 --- a/cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h +++ b/cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h @@ -23,6 +23,15 @@ # include #endif +// Move this vendored copy of flatbuffers to a private namespace, +// but continue to access it through the "flatbuffers" alias. +namespace arrow_vendored_private { +namespace flatbuffers { +} +} +namespace flatbuffers = arrow_vendored_private::flatbuffers; + +namespace arrow_vendored_private { namespace flatbuffers { // Generic 'operator==' with conditional specialisations. // T e - new value of a scalar field. @@ -2777,6 +2786,7 @@ volatile __attribute__((weak)) const char *flatbuffer_version_string = } /// @endcond } // namespace flatbuffers +} // namespace arrow_vendored_private // clang-format on diff --git a/cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h b/cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h index 8bae61bfd6a31..7e5a95233b0bc 100644 --- a/cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h +++ b/cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h @@ -25,6 +25,14 @@ #include #include +// Move this vendored copy of flatbuffers to a private namespace, +// but continue to access it through the "flatbuffers" alias. +namespace arrow_vendored_private { +namespace flatbuffers { +} +} +namespace flatbuffers = arrow_vendored_private::flatbuffers; + #if defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL) #define FLATBUFFERS_CPP98_STL #endif // defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL) @@ -44,6 +52,7 @@ #endif // This header provides backwards compatibility for C++98 STLs like stlport. +namespace arrow_vendored_private { namespace flatbuffers { // Retrieve ::back() from a string in a way that is compatible with pre C++11 @@ -303,5 +312,6 @@ inline void vector_emplace_back(std::vector *vector, V &&data) { #endif // !FLATBUFFERS_CPP98_STL } // namespace flatbuffers +} // namespace arrow_vendored_private #endif // FLATBUFFERS_STL_EMULATION_H_ diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index cdb048327ffd1..e7e6590ef9fed 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -68,8 +68,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.4.1 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=301b1ab74a664723560f46c29f228360aff1e2d63e930b963755ea077ae67524 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412 -ARROW_ORC_BUILD_VERSION=1.7.5 -ARROW_ORC_BUILD_SHA256_CHECKSUM=b90cae5853e3ea0e5bf64e3883d09d871ec280536e0bb2cc52d08f79f0be0010 +ARROW_ORC_BUILD_VERSION=1.7.6 +ARROW_ORC_BUILD_SHA256_CHECKSUM=a75e0cccaaf5e03f1699bb804f640246bec4d134cb57957125707b0b2822160d ARROW_PROTOBUF_BUILD_VERSION=v21.3 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f # Because of https://github.com/Tencent/rapidjson/pull/1323, we require diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 8019c7395e8a4..915777f08997d 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow", - "version-string": "9.0.0-SNAPSHOT", + "version-string": "10.0.0-SNAPSHOT", "dependencies": [ "abseil", { diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props index 547fe6a8d8295..51c9e77212369 100644 --- a/csharp/Directory.Build.props +++ b/csharp/Directory.Build.props @@ -29,7 +29,7 @@ Apache Arrow library Copyright 2016-2019 The Apache Software Foundation The Apache Software Foundation - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT diff --git a/dev/archery/archery/bot.py b/dev/archery/archery/bot.py index fb300f554c03d..c548e9a2a473c 100644 --- a/dev/archery/archery/bot.py +++ b/dev/archery/archery/bot.py @@ -259,6 +259,7 @@ def submit(obj, tasks, groups, params, arrow_version): # parse additional job parameters params = dict([p.split("=") for p in params]) + params['pr_number'] = pull_request.number # instantiate the job object job = Job.from_config(config=config, target=target, tasks=tasks, diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index 8bdc490958b0b..d34b22bd9e63b 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -17,6 +17,7 @@ from pathlib import Path import time +import sys import click @@ -273,16 +274,30 @@ def highlight(code): help='Fetch references (branches and tags) from the remote') @click.option('--task-filter', '-f', 'task_filters', multiple=True, help='Glob pattern for filtering relevant tasks') +@click.option('--validate/--no-validate', default=False, + help='Return non-zero exit code ' + 'if there is any non-success task') @click.pass_obj -def status(obj, job_name, fetch, task_filters): +def status(obj, job_name, fetch, task_filters, validate): output = obj['output'] queue = obj['queue'] if fetch: queue.fetch() job = queue.get(job_name) + success = True + + def asset_callback(task_name, task, asset): + nonlocal success + if task.status().combined_state in {'error', 'failure'}: + success = False + if asset is None: + success = False + report = ConsoleReport(job, task_filters=task_filters) - report.show(output) + report.show(output, asset_callback=asset_callback) + if validate and not success: + sys.exit(1) @crossbow.command() diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 49f915ec82d93..7f5d1442ffc81 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -675,6 +675,7 @@ def put(self, job, prefix='build', increment_job_id=True): params = { **job.params, "arrow": job.target, + "job": job, "queue_remote_url": self.remote_url } files = task.render_files(job.template_searchpath, params=params) @@ -695,7 +696,7 @@ def get_version(root, **kwargs): # query the calculated version based on the git tags kwargs['describe_command'] = ( - 'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"' + 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' ) version = parse_git_version(root, **kwargs) tag = str(version.tag) @@ -790,7 +791,7 @@ class Task(Serializable): submitting the job to a queue. """ - def __init__(self, ci, template, artifacts=None, params=None): + def __init__(self, name, ci, template, artifacts=None, params=None): assert ci in { 'circle', 'travis', @@ -799,6 +800,7 @@ def __init__(self, ci, template, artifacts=None, params=None): 'github', 'drone', } + self.name = name self.ci = ci self.template = template self.artifacts = artifacts or [] @@ -1011,6 +1013,7 @@ def render_tasks(self, params=None): params = { **self.params, "arrow": self.target, + "job": self, **(params or {}) } for task_name, task in self.tasks.items(): @@ -1081,9 +1084,10 @@ def from_config(cls, config, target, tasks=None, groups=None, params=None): 'no_rc_version': target.no_rc_version, 'no_rc_semver_version': target.no_rc_semver_version} for task_name, task in task_definitions.items(): + task = task.copy() artifacts = task.pop('artifacts', None) or [] # because of yaml artifacts = [fn.format(**versions) for fn in artifacts] - tasks[task_name] = Task(artifacts=artifacts, **task) + tasks[task_name] = Task(task_name, artifacts=artifacts, **task) return cls(target=target, tasks=tasks, params=params, template_searchpath=config.template_searchpath) @@ -1219,7 +1223,7 @@ def validate(self): # validate that the tasks are constructible for task_name, task in self['tasks'].items(): try: - Task(**task) + Task(task_name, **task) except Exception as e: raise CrossbowError( 'Unable to construct a task object from the ' @@ -1236,13 +1240,19 @@ def validate(self): version='1.0.0dev123', email='dummy@example.ltd' ) + job = Job.from_config(config=self, + target=target, + tasks=self['tasks'], + groups=self['groups'], + params={}) for task_name, task in self['tasks'].items(): - task = Task(**task) + task = Task(task_name, **task) files = task.render_files( self.template_searchpath, params=dict( arrow=target, + job=job, queue_remote_url='https://github.com/org/crossbow' ) ) diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py index 4d6da095b80a3..600b7be600cbe 100644 --- a/dev/archery/archery/docker/core.py +++ b/dev/archery/archery/docker/core.py @@ -16,7 +16,7 @@ # under the License. import os -import re +import shlex import subprocess from io import StringIO @@ -24,6 +24,7 @@ from ruamel.yaml import YAML from ..utils.command import Command, default_bin +from ..utils.source import arrow_path from ..compat import _ensure_path @@ -42,12 +43,6 @@ def flatten(node, parents=None): raise TypeError(node) -def _sanitize_command(cmd): - if isinstance(cmd, list): - cmd = " ".join(cmd) - return re.sub(r"\s+", " ", cmd) - - _arch_short_mapping = { 'arm64v8': 'arm64', } @@ -294,7 +289,7 @@ def _build(service, use_cache): args.extend([ '--output', 'type=docker', - '-f', service['build']['dockerfile'], + '-f', arrow_path(service['build']['dockerfile']), '-t', service['image'], service['build'].get('context', '.') ]) @@ -306,7 +301,7 @@ def _build(service, use_cache): for img in cache_from: args.append('--cache-from="{}"'.format(img)) args.extend([ - '-f', service['build']['dockerfile'], + '-f', arrow_path(service['build']['dockerfile']), '-t', service['image'], service['build'].get('context', '.') ]) @@ -381,10 +376,9 @@ def run(self, service_name, command=None, *, env=None, volumes=None, if command is not None: args.append(command) else: - # replace whitespaces from the preformatted compose command - cmd = _sanitize_command(service.get('command', '')) + cmd = service.get('command', '') if cmd: - args.append(cmd) + args.extend(shlex.split(cmd)) # execute as a plain docker cli command self._execute_docker('run', '--rm', *args) diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py index c268bb2312d70..899a0449e1a70 100644 --- a/dev/archery/archery/docker/tests/test_docker.py +++ b/dev/archery/archery/docker/tests/test_docker.py @@ -509,7 +509,7 @@ def test_image_with_gpu(arrow_compose_path): "-e", "OTHER_ENV=2", "-v", "/host:/container:rw", "org/ubuntu-cuda", - '/bin/bash -c "echo 1 > /tmp/dummy && cat /tmp/dummy"' + "/bin/bash", "-c", "echo 1 > /tmp/dummy && cat /tmp/dummy", ] ] with assert_docker_calls(compose, expected_calls): diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 972f4c826553e..69397fc04123d 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1587,7 +1587,6 @@ def _temp_path(): generate_primitive_large_offsets_case([17, 20]) .skip_category('C#') - .skip_category('Go') .skip_category('JS'), generate_null_case([10, 0]) @@ -1601,9 +1600,7 @@ def _temp_path(): generate_decimal128_case(), generate_decimal256_case() - .skip_category('Go') # TODO(ARROW-7948): Decimal + Go - .skip_category('JS') - .skip_category('Rust'), + .skip_category('JS'), generate_datetime_case(), @@ -1635,12 +1632,10 @@ def _temp_path(): generate_nested_large_offsets_case() .skip_category('C#') - .skip_category('Go') .skip_category('JS'), generate_unions_case() .skip_category('C#') - .skip_category('Go') .skip_category('JS'), generate_custom_metadata_case() @@ -1652,7 +1647,6 @@ def _temp_path(): .skip_category('Go') .skip_category('JS'), - # TODO(ARROW-3039, ARROW-5267): Dictionaries in GO generate_dictionary_case() .skip_category('C#'), diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py index e652ff88af58d..05f945cb82416 100644 --- a/dev/archery/archery/integration/runner.py +++ b/dev/archery/archery/integration/runner.py @@ -35,8 +35,8 @@ from .tester_java import JavaTester from .tester_js import JSTester from .tester_csharp import CSharpTester -from .util import (ARROW_ROOT_DEFAULT, guid, SKIP_ARROW, SKIP_FLIGHT, - printer) +from .util import guid, SKIP_ARROW, SKIP_FLIGHT, printer +from ..utils.source import ARROW_ROOT_DEFAULT from . import datagen @@ -134,21 +134,18 @@ def _gold_tests(self, gold_dir): skip.add("Java") if prefix == '1.0.0-bigendian' or prefix == '1.0.0-littleendian': skip.add("C#") - skip.add("Go") skip.add("Java") skip.add("JS") skip.add("Rust") if prefix == '2.0.0-compression': skip.add("C#") skip.add("JS") - skip.add("Rust") # See https://github.com/apache/arrow/pull/9822 for how to # disable specific compression type tests. if prefix == '4.0.0-shareddict': skip.add("C#") - skip.add("Go") quirks = set() if prefix in {'0.14.1', '0.17.1', @@ -432,12 +429,11 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True, Scenario( "middleware", description="Ensure headers are propagated via middleware.", - skip={"Rust"} # TODO(ARROW-10961): tonic upgrade needed ), Scenario( "flight_sql", description="Ensure Flight SQL protocol is working as expected.", - skip={"Rust", "Go"} + skip={"Rust"} ), ] diff --git a/dev/archery/archery/integration/tester_cpp.py b/dev/archery/archery/integration/tester_cpp.py index d59198361b5e9..52cc565dc00a3 100644 --- a/dev/archery/archery/integration/tester_cpp.py +++ b/dev/archery/archery/integration/tester_cpp.py @@ -20,7 +20,8 @@ import subprocess from .tester import Tester -from .util import run_cmd, ARROW_ROOT_DEFAULT, log +from .util import run_cmd, log +from ..utils.source import ARROW_ROOT_DEFAULT _EXE_PATH = os.environ.get( diff --git a/dev/archery/archery/integration/tester_csharp.py b/dev/archery/archery/integration/tester_csharp.py index 144709f2e9547..3f24d5f264126 100644 --- a/dev/archery/archery/integration/tester_csharp.py +++ b/dev/archery/archery/integration/tester_csharp.py @@ -18,7 +18,8 @@ import os from .tester import Tester -from .util import run_cmd, ARROW_ROOT_DEFAULT, log +from .util import run_cmd, log +from ..utils.source import ARROW_ROOT_DEFAULT _EXE_PATH = os.path.join( diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py index dc4550d997eb5..45855079eb72e 100644 --- a/dev/archery/archery/integration/tester_java.py +++ b/dev/archery/archery/integration/tester_java.py @@ -20,7 +20,8 @@ import subprocess from .tester import Tester -from .util import run_cmd, ARROW_ROOT_DEFAULT, log +from .util import run_cmd, log +from ..utils.source import ARROW_ROOT_DEFAULT def load_version_from_pom(): @@ -55,10 +56,10 @@ def load_version_from_pom(): ), ) _ARROW_FLIGHT_SERVER = ( - "org.apache.arrow.flight.integration.tests." "IntegrationTestServer" + "org.apache.arrow.flight.integration.tests.IntegrationTestServer" ) _ARROW_FLIGHT_CLIENT = ( - "org.apache.arrow.flight.integration.tests." "IntegrationTestClient" + "org.apache.arrow.flight.integration.tests.IntegrationTestClient" ) @@ -70,10 +71,24 @@ class JavaTester(Tester): name = 'Java' + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Detect whether we're on Java 8 or Java 9+ + self._java_opts = _JAVA_OPTS[:] + proc = subprocess.run( + ['java', '--add-opens'], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + text=True) + if 'Unrecognized option: --add-opens' not in proc.stderr: + # Java 9+ + self._java_opts.append( + '--add-opens=java.base/java.nio=ALL-UNNAMED') + def _run(self, arrow_path=None, json_path=None, command='VALIDATE'): cmd = ( ['java'] + - _JAVA_OPTS + + self._java_opts + ['-cp', _ARROW_TOOLS_JAR, 'org.apache.arrow.tools.Integration'] ) @@ -98,7 +113,7 @@ def json_to_file(self, json_path, arrow_path): def stream_to_file(self, stream_path, file_path): cmd = ( - ['java'] + _JAVA_OPTS + [ + ['java'] + self._java_opts + [ '-cp', _ARROW_TOOLS_JAR, 'org.apache.arrow.tools.StreamToFile', @@ -112,7 +127,7 @@ def stream_to_file(self, stream_path, file_path): def file_to_stream(self, file_path, stream_path): cmd = ( - ['java'] + _JAVA_OPTS + [ + ['java'] + self._java_opts + [ '-cp', _ARROW_TOOLS_JAR, 'org.apache.arrow.tools.FileToStream', @@ -126,9 +141,10 @@ def file_to_stream(self, file_path, stream_path): def flight_request(self, port, json_path=None, scenario_name=None): cmd = ( - ['java'] + _JAVA_OPTS + ['-cp', _ARROW_FLIGHT_JAR, - _ARROW_FLIGHT_CLIENT, '-port', str(port)] - ) + ['java'] + self._java_opts + [ + '-cp', _ARROW_FLIGHT_JAR, _ARROW_FLIGHT_CLIENT, '-port', str( + port) + ]) if json_path: cmd.extend(('-j', json_path)) @@ -145,7 +161,7 @@ def flight_request(self, port, json_path=None, scenario_name=None): def flight_server(self, scenario_name=None): cmd = ( ['java'] + - _JAVA_OPTS + + self._java_opts + ['-cp', _ARROW_FLIGHT_JAR, _ARROW_FLIGHT_SERVER, '-port', '0'] ) if scenario_name: diff --git a/dev/archery/archery/integration/tester_js.py b/dev/archery/archery/integration/tester_js.py index 80faefb8d866f..6544a1fc6cc3c 100644 --- a/dev/archery/archery/integration/tester_js.py +++ b/dev/archery/archery/integration/tester_js.py @@ -18,7 +18,9 @@ import os from .tester import Tester -from .util import run_cmd, ARROW_ROOT_DEFAULT, log +from .util import run_cmd, log +from ..utils.source import ARROW_ROOT_DEFAULT + _EXE_PATH = os.path.join(ARROW_ROOT_DEFAULT, 'js/bin') _VALIDATE = os.path.join(_EXE_PATH, 'integration.js') diff --git a/dev/archery/archery/integration/tester_rust.py b/dev/archery/archery/integration/tester_rust.py index 0e8943f2a8de4..c7a94de2197bd 100644 --- a/dev/archery/archery/integration/tester_rust.py +++ b/dev/archery/archery/integration/tester_rust.py @@ -20,7 +20,8 @@ import subprocess from .tester import Tester -from .util import run_cmd, ARROW_ROOT_DEFAULT, log +from .util import run_cmd, log +from ..utils.source import ARROW_ROOT_DEFAULT _EXE_PATH = os.path.join(ARROW_ROOT_DEFAULT, "rust/target/debug") diff --git a/dev/archery/archery/integration/util.py b/dev/archery/archery/integration/util.py index a4c4982ecb38f..80ba30052e4da 100644 --- a/dev/archery/archery/integration/util.py +++ b/dev/archery/archery/integration/util.py @@ -17,7 +17,6 @@ import contextlib import io -import os import random import socket import subprocess @@ -36,11 +35,6 @@ def guid(): SKIP_ARROW = 'arrow' SKIP_FLIGHT = 'flight' -ARROW_ROOT_DEFAULT = os.environ.get( - 'ARROW_ROOT', - os.path.abspath(__file__).rsplit("/", 5)[0] -) - class _Printer: """ diff --git a/dev/archery/archery/utils/command.py b/dev/archery/archery/utils/command.py index f655e2ef2e58d..c3161164d312e 100644 --- a/dev/archery/archery/utils/command.py +++ b/dev/archery/archery/utils/command.py @@ -24,7 +24,7 @@ def default_bin(name, default): - assert(default) + assert default env_name = "ARCHERY_{0}_BIN".format(default.upper()) return name if name else os.environ.get(env_name, default) diff --git a/dev/archery/archery/utils/source.py b/dev/archery/archery/utils/source.py index 1080cb75d67f5..e8f0ca92c4799 100644 --- a/dev/archery/archery/utils/source.py +++ b/dev/archery/archery/utils/source.py @@ -22,6 +22,19 @@ from .git import git +ARROW_ROOT_DEFAULT = os.environ.get( + 'ARROW_ROOT', + Path(__file__).resolve().parents[4] +) + + +def arrow_path(path): + """ + Return full path to a file given its path inside the Arrow repo. + """ + return os.path.join(ARROW_ROOT_DEFAULT, path) + + class InvalidArrowSource(Exception): pass diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index b498a29763241..808b48d4c4f70 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -169,6 +169,13 @@ def test_version_pre_tag ], ], }, + { + path: "go/arrow/doc.go", + hunks: [ + ["-const PkgVersion = \"#{@snapshot_version}\"", + "+const PkgVersion = \"#{@release_version}\""], + ], + }, { path: "go/parquet/writer_properties.go", hunks: [ @@ -190,6 +197,13 @@ def test_version_pre_tag "+set(MLARROW_VERSION \"#{@release_version}\")"], ], }, + { + path: "python/pyarrow/src/CMakeLists.txt", + hunks: [ + ["-set(ARROW_PYTHON_VERSION \"#{@snapshot_version}\")", + "+set(ARROW_PYTHON_VERSION \"#{@release_version}\")"], + ], + }, { path: "python/setup.py", hunks: [ diff --git a/dev/release/05-binary-upload.sh b/dev/release/05-binary-upload.sh index d6163e7059dac..c8ac64b80c9c2 100755 --- a/dev/release/05-binary-upload.sh +++ b/dev/release/05-binary-upload.sh @@ -127,6 +127,9 @@ docker_run \ APT_TARGETS=$(IFS=,; echo "${apt_targets[*]}") \ ARTIFACTORY_API_KEY="${ARTIFACTORY_API_KEY}" \ ARTIFACTS_DIR="${tmp_dir}/artifacts" \ + DEB_PACKAGE_NAME=${DEB_PACKAGE_NAME:-} \ + DRY_RUN=${DRY_RUN:-no} \ + GPG_KEY_ID="${GPG_KEY_ID}" \ RC=${rc} \ STAGING=${STAGING:-no} \ VERSION=${version} \ diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index 857517d108b83..d5c072b2d842c 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -292,7 +292,7 @@ def close def request(method, headers, url, body: nil, &block) request = build_request(method, url, headers, body: body) - if ENV["DRY_RUN"] + if ENV["DRY_RUN"] == "yes" case request when Net::HTTP::Get, Net::HTTP::Head else @@ -1302,10 +1302,13 @@ def define_apt_rc_tasks Dir.glob("#{source_dir_prefix}*/**/*") do |path| next if File.directory?(path) base_name = File.basename(path) - if base_name.start_with?("apache-arrow-apt-source") - package_name = "apache-arrow-apt-source" - else - package_name = "apache-arrow" + package_name = ENV["DEB_PACKAGE_NAME"] + if package_name.nil? or package_name.empty? + if base_name.start_with?("apache-arrow-apt-source") + package_name = "apache-arrow-apt-source" + else + package_name = "apache-arrow" + end end destination_path = [ pool_dir, diff --git a/dev/release/post-11-bump-versions-test.rb b/dev/release/post-11-bump-versions-test.rb index 0745f692b9c2b..7f7def52800f4 100644 --- a/dev/release/post-11-bump-versions-test.rb +++ b/dev/release/post-11-bump-versions-test.rb @@ -115,6 +115,21 @@ def test_version_post_tag "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@release_version}.9000/apache-arrow-#{@release_version}.9000.tar.gz\""], ], }, + { + path: "docs/source/_static/versions.json", + hunks: [ + [ + "- \"name\": \"10.0 (dev)\",", + "+ \"name\": \"11.0 (dev)\",", + "- \"name\": \"9.0 (stable)\",", + "+ \"name\": \"10.0 (stable)\",", + "+ {", + "+ \"name\": \"9.0\",", + "+ \"version\": \"9.0/\"", + "+ },", + ], + ], + }, { path: "js/package.json", hunks: [ @@ -129,6 +144,13 @@ def test_version_post_tag "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"], ], }, + { + path: "python/pyarrow/src/CMakeLists.txt", + hunks: [ + ["-set(ARROW_PYTHON_VERSION \"#{@snapshot_version}\")", + "+set(ARROW_PYTHON_VERSION \"#{@next_snapshot_version}\")"], + ], + }, { path: "python/setup.py", hunks: [ @@ -152,9 +174,49 @@ def test_version_post_tag "+# arrow #{@release_version}",], ], }, + { + path: "r/pkgdown/assets/versions.json", + hunks: [ + [ + "- \"name\": \"9.0.0.9000 (dev)\",", + "+ \"name\": \"10.0.0.9000 (dev)\",", + "- \"name\": \"9.0.0 (release)\",", + "+ \"name\": \"10.0.0 (release)\",", + "+ {", + "+ \"name\": \"9.0.0\",", + "+ \"version\": \"9.0/\"", + "+ },", + ], + ], + }, ] Dir.glob("go/**/{go.mod,*.go,*.go.*}") do |path| + if path == "go/arrow/doc.go" + expected_changes << { + path: path, + hunks: [ + [ + "-const PkgVersion = \"#{@snapshot_version}\"", + "+const PkgVersion = \"#{@next_snapshot_version}\"", + ], + ]} + next + elsif path == "go/arrow/compute/go.mod" + expected_changes << { + path: path, + hunks: [ + [ + "-module github.com/apache/arrow/go/v#{@snapshot_major_version}/arrow/compute", + "+module github.com/apache/arrow/go/v#{@next_major_version}/arrow/compute", + "-replace github.com/apache/arrow/go/v#{@snapshot_major_version} => ../../", + "+replace github.com/apache/arrow/go/v#{@next_major_version} => ../../", + "-\tgithub.com/apache/arrow/go/v#{@snapshot_major_version} v#{@snapshot_major_version}.0.0-00010101000000-000000000000", + "+\tgithub.com/apache/arrow/go/v#{@next_major_version} v#{@next_major_version}.0.0-00010101000000-000000000000", + ], + ]} + next + end import_path = "github.com/apache/arrow/go/v#{@snapshot_major_version}" lines = File.readlines(path, chomp: true) target_lines = lines.grep(/#{Regexp.escape(import_path)}/) diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 565296415174f..752d06fd59524 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -69,61 +69,57 @@ dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib900.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda900.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1000.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1000.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib900.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset900.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1000.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1000.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-dev.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib900.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1000.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-dev.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-dev.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.doc-base dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib900.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql900.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight900.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1000.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1000.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1000.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib900.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight900.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python900.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow900.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1000.install +dev/tasks/linux-packages/apache-arrow/debian/libarrow1000.install dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib900.install -dev/tasks/linux-packages/apache-arrow/debian/libgandiva900.install +dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1000.install +dev/tasks/linux-packages/apache-arrow/debian/libgandiva1000.install dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib900.install -dev/tasks/linux-packages/apache-arrow/debian/libparquet900.install +dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1000.install +dev/tasks/linux-packages/apache-arrow/debian/libparquet1000.install dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib900.install -dev/tasks/linux-packages/apache-arrow/debian/libplasma900.install +dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib1000.install +dev/tasks/linux-packages/apache-arrow/debian/libplasma1000.install dev/tasks/linux-packages/apache-arrow/debian/patches/series dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install dev/tasks/linux-packages/apache-arrow/debian/rules @@ -140,8 +136,10 @@ go/arrow/flight/internal/flight/Flight_grpc.pb.go go/arrow/internal/cpu/* go/arrow/type_string.go go/arrow/cdata/test/go.sum +go/arrow/unionmode_string.go +go/arrow/compute/go.sum go/arrow/compute/datumkind_string.go -go/arrow/compute/valueshape_string.go +go/arrow/compute/funckind_string.go go/*.tmpldata go/*.s go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index 609325da01cc6..f6ee589f15788 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -121,6 +121,14 @@ update_versions() { git add setup.py popd + pushd "${ARROW_DIR}/python/pyarrow/src" + sed -i.bak -E -e \ + "s/^set\(ARROW_PYTHON_VERSION \".+\"\)/set(ARROW_PYTHON_VERSION \"${version}\")/" \ + CMakeLists.txt + rm -f CMakeLists.txt.bak + git add CMakeLists.txt + popd + pushd "${ARROW_DIR}/r" sed -i.bak -E -e \ "s/^Version: .+/Version: ${r_version}/" \ @@ -156,20 +164,27 @@ update_versions() { sed -i.bak -E -e \ "s/\"parquet-go version .+\"/\"parquet-go version ${version}\"/" \ parquet/writer_properties.go + sed -i.bak -E -e \ + "s/const PkgVersion = \".*/const PkgVersion = \"${version}\"/" \ + arrow/doc.go + # handle the pseudo version in the compute sub-module for now + # subsequent changes will allow this to remove the pseudo version but + # for now we have to overcome the slight conflict between the existing + # "compute" package and the new go.mod file. + sed -i.bak -E -e \ + "s|v[0-9]+\\.0\\.0-00010101000000-000000000000|v${major_version}.0.0-00010101000000-000000000000|" \ + arrow/compute/go.mod + find . -name "*.bak" -exec rm {} \; git add . popd - case ${type} in - release) - pushd "${ARROW_DIR}" - ${PYTHON:-python3} "dev/release/utils-update-docs-versions.py" \ - . \ - "${version}" \ - "${next_version}" - git add docs/source/_static/versions.json - git add r/pkgdown/assets/versions.json - popd - ;; - esac + pushd "${ARROW_DIR}" + ${PYTHON:-python3} "dev/release/utils-update-docs-versions.py" \ + . \ + "${base_version}" \ + "${next_version}" + git add docs/source/_static/versions.json + git add r/pkgdown/assets/versions.json + popd } diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh index 846d9c7ec3b8a..5de5209a55ce5 100755 --- a/dev/release/verify-apt.sh +++ b/dev/release/verify-apt.sh @@ -158,6 +158,8 @@ echo "::endgroup::" echo "::group::Test Apache Arrow GLib" +export G_DEBUG=fatal-warnings + ${APT_INSTALL} libarrow-glib-dev=${package_version} ${APT_INSTALL} libarrow-glib-doc=${package_version} @@ -196,13 +198,6 @@ ruby -r gi -e "p GI.load('ArrowFlightSQL')" echo "::endgroup::" -if [ "${have_python}" = "yes" ]; then - echo "::group::Test libarrow-python" - ${APT_INSTALL} libarrow-python-dev=${package_version} - echo "::endgroup::" -fi - - if [ "${have_plasma}" = "yes" ]; then echo "::group::Test Plasma" ${APT_INSTALL} libplasma-glib-dev=${package_version} diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh index 889dddd9d1b9e..6d40bccf2bc57 100755 --- a/dev/release/verify-yum.sh +++ b/dev/release/verify-yum.sh @@ -46,12 +46,14 @@ repository_version="${distribution_version}" cmake_package=cmake cmake_command=cmake +have_arrow_libs=no have_flight=yes have_gandiva=yes have_glib=yes have_parquet=yes have_python=yes -have_arrow_libs=no +have_ruby=yes +ruby_devel_packages=(ruby-devel) install_command="dnf install -y --enablerepo=crb" uninstall_command="dnf remove -y" clean_command="dnf clean" @@ -63,6 +65,7 @@ case "${distribution}-${distribution_version}" in almalinux-8) distribution_prefix="almalinux" have_arrow_libs=yes + ruby_devel_packages+=(redhat-rpm-config) install_command="dnf install -y --enablerepo=powertools" info_command="dnf info --enablerepo=powertools" ;; @@ -73,12 +76,13 @@ case "${distribution}-${distribution_version}" in distribution_prefix="amazon-linux" cmake_package=cmake3 cmake_command=cmake3 - have_flight=no - have_gandiva=no - have_python=no if [ "$(arch)" != "aarch64" ]; then have_arrow_libs=yes fi + have_flight=no + have_gandiva=no + have_python=no + have_ruby=no install_command="yum install -y" uninstall_command="yum remove -y" clean_command="yum clean" @@ -89,10 +93,11 @@ case "${distribution}-${distribution_version}" in distribution_prefix="centos" cmake_package=cmake3 cmake_command=cmake3 + have_arrow_libs=yes have_flight=no have_gandiva=no have_python=no - have_arrow_libs=yes + have_ruby=no install_command="yum install -y" uninstall_command="yum remove -y" clean_command="yum clean" @@ -101,6 +106,7 @@ case "${distribution}-${distribution_version}" in centos-8) distribution_prefix="centos" repository_version+="-stream" + ruby_devel_packages+=(redhat-rpm-config) install_command="dnf install -y --enablerepo=powertools" info_command="dnf info --enablerepo=powertools" ;; @@ -206,6 +212,8 @@ echo "::endgroup::" if [ "${have_glib}" = "yes" ]; then echo "::group::Test Apache Arrow GLib" + export G_DEBUG=fatal-warnings + ${install_command} --enablerepo=epel arrow-glib-devel-${package_version} ${install_command} --enablerepo=epel arrow-glib-doc-${package_version} @@ -215,6 +223,12 @@ if [ "${have_glib}" = "yes" ]; then valac --pkg arrow-glib --pkg posix build.vala ./build popd + + if [ "${have_ruby}" = "yes" ]; then + ${install_command} "${ruby_devel_packages[@]}" + gem install gobject-introspection + ruby -r gi -e "p GI.load('Arrow')" + fi echo "::endgroup::" fi @@ -222,17 +236,17 @@ if [ "${have_flight}" = "yes" ]; then echo "::group::Test Apache Arrow Flight" ${install_command} --enablerepo=epel arrow-flight-glib-devel-${package_version} ${install_command} --enablerepo=epel arrow-flight-glib-doc-${package_version} + if [ "${have_ruby}" = "yes" ]; then + ruby -r gi -e "p GI.load('ArrowFlight')" + fi echo "::endgroup::" echo "::group::Test Apache Arrow Flight SQL" ${install_command} --enablerepo=epel arrow-flight-sql-glib-devel-${package_version} ${install_command} --enablerepo=epel arrow-flight-sql-glib-doc-${package_version} - echo "::endgroup::" -fi - -if [ "${have_python}" = "yes" ]; then - echo "::group::Test libarrow-python" - ${install_command} --enablerepo=epel arrow-python-devel-${package_version} + if [ "${have_ruby}" = "yes" ]; then + ruby -r gi -e "p GI.load('ArrowFlightSQL')" + fi echo "::endgroup::" fi @@ -240,6 +254,9 @@ echo "::group::Test Plasma" if [ "${have_glib}" = "yes" ]; then ${install_command} --enablerepo=epel plasma-glib-devel-${package_version} ${install_command} --enablerepo=epel plasma-glib-doc-${package_version} + if [ "${have_ruby}" = "yes" ]; then + ruby -r gi -e "p GI.load('Plasma')" + fi else ${install_command} --enablerepo=epel plasma-devel-${package_version} fi @@ -250,6 +267,9 @@ if [ "${have_gandiva}" = "yes" ]; then if [ "${have_glib}" = "yes" ]; then ${install_command} --enablerepo=epel gandiva-glib-devel-${package_version} ${install_command} --enablerepo=epel gandiva-glib-doc-${package_version} + if [ "${have_ruby}" = "yes" ]; then + ruby -r gi -e "p GI.load('Gandiva')" + fi else ${install_command} --enablerepo=epel gandiva-devel-${package_version} fi @@ -261,6 +281,9 @@ if [ "${have_parquet}" = "yes" ]; then if [ "${have_glib}" = "yes" ]; then ${install_command} --enablerepo=epel parquet-glib-devel-${package_version} ${install_command} --enablerepo=epel parquet-glib-doc-${package_version} + if [ "${have_ruby}" = "yes" ]; then + ruby -r gi -e "p GI.load('Parquet')" + fi else ${install_command} --enablerepo=epel parquet-devel-${package_version} fi diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml index dcf1afe4a1a55..8f1ba74e3afaa 100644 --- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml +++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml @@ -123,7 +123,6 @@ outputs: - test -f $PREFIX/lib/libarrow.so # [linux] - test -f $PREFIX/lib/libarrow_dataset.so # [linux] - test -f $PREFIX/lib/libarrow_flight.so # [linux] - - test -f $PREFIX/lib/libarrow_python.so # [linux] - test -f $PREFIX/lib/libparquet.so # [linux] - test -f $PREFIX/lib/libgandiva.so # [linux] - test -f $PREFIX/lib/libplasma.so # [linux] @@ -133,14 +132,12 @@ outputs: - if exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1 # [(cuda_compiler_version == "None") and win] - test -f $PREFIX/lib/libarrow.dylib # [osx] - test -f $PREFIX/lib/libarrow_dataset.dylib # [osx] - - test -f $PREFIX/lib/libarrow_python.dylib # [osx] - test -f $PREFIX/lib/libgandiva.dylib # [osx and not arm64] - test -f $PREFIX/lib/libparquet.dylib # [osx] - test -f $PREFIX/lib/libplasma.dylib # [osx] - if not exist %PREFIX%\\Library\\bin\\arrow.dll exit 1 # [win] - if not exist %PREFIX%\\Library\\bin\\arrow_dataset.dll exit 1 # [win] - if not exist %PREFIX%\\Library\\bin\\arrow_flight.dll exit 1 # [win] - - if not exist %PREFIX%\\Library\\bin\\arrow_python.dll exit 1 # [win] - if not exist %PREFIX%\\Library\\bin\\parquet.dll exit 1 # [win] - if not exist %PREFIX%\\Library\\bin\\gandiva.dll exit 1 # [win] @@ -148,14 +145,12 @@ outputs: - test ! -f $PREFIX/lib/libarrow.a # [unix] - test ! -f $PREFIX/lib/libarrow_dataset.a # [unix] - test ! -f $PREFIX/lib/libarrow_flight.a # [unix] - - test ! -f $PREFIX/lib/libarrow_python.a # [unix] - test ! -f $PREFIX/lib/libplasma.a # [unix] - test ! -f $PREFIX/lib/libparquet.a # [unix] - test ! -f $PREFIX/lib/libgandiva.a # [unix] - if exist %PREFIX%\\Library\\lib\\arrow_static.lib exit 1 # [win] - if exist %PREFIX%\\Library\\lib\\arrow_dataset_static.lib exit 1 # [win] - if exist %PREFIX%\\Library\\lib\\arrow_flight_static.lib exit 1 # [win] - - if exist %PREFIX%\\Library\\lib\\arrow_python_static.lib exit 1 # [win] - if exist %PREFIX%\\Library\\lib\\parquet_static.lib exit 1 # [win] - if exist %PREFIX%\\Library\\lib\\gandiva_static.lib exit 1 # [win] diff --git a/dev/tasks/conda-recipes/azure.osx.yml b/dev/tasks/conda-recipes/azure.osx.yml index 505a17cebee01..404d6daee3bb1 100755 --- a/dev/tasks/conda-recipes/azure.osx.yml +++ b/dev/tasks/conda-recipes/azure.osx.yml @@ -3,7 +3,7 @@ jobs: - job: osx pool: - vmImage: macOS-10.15 + vmImage: macOS-latest timeoutInMinutes: 360 variables: CONFIG: {{ config }} diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml index 41d54981047d9..f7fd6a0be6eff 100644 --- a/dev/tasks/docker-tests/github.linux.yml +++ b/dev/tasks/docker-tests/github.linux.yml @@ -24,14 +24,9 @@ jobs: name: | Docker Test {{ flags|default("") }} {{ image }} {{ command|default("") }} runs-on: ubuntu-latest - {% if env is defined %} - env: - {% for key, value in env.items() %} - {{ key }}: "{{ value }}" - {% endfor %} - {% endif %} +{{ macros.github_set_env(env) }} steps: - {{ macros.github_checkout_arrow(fetch_depth=fetch_depth if fetch_depth is defined else 1)|indent }} + {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }} {{ macros.github_install_archery()|indent }} - name: Execute Docker Build diff --git a/dev/tasks/docs/github.linux.yml b/dev/tasks/docs/github.linux.yml new file mode 100644 index 0000000000000..853729dd9a5ae --- /dev/null +++ b/dev/tasks/docs/github.linux.yml @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} +{% set pr_number = pr_number|replace("Unset", arrow.branch) %} +{{ macros.github_header() }} + +jobs: + test: + name: Docs Preview + runs-on: ubuntu-latest +{{ macros.github_set_env(env) }} + steps: + {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }} + {{ macros.github_install_archery()|indent }} + + - name: Execute Docker Build + shell: bash + env: + ARROW_JAVA_SKIP_GIT_PLUGIN: true + run: | + archery docker run \ + -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \ + {{ flags|default("") }} \ + {{ image }} \ + {{ command|default("") }} + - name: Checkout Crossbow + uses: actions/checkout@v3 + with: + ref: {{ default_branch|default("master") }} + path: crossbow + fetch-depth: 1 + - name: Move docs + run: | + # build files are created by the docker user + sudo chown -R ${USER}: build + PR_DIR=crossbow/docs/pr_docs/{{ pr_number }} + mkdir -p $PR_DIR + rsync -a --delete build/docs/ $PR_DIR + - name: Push changes + run: | + cd crossbow + git config --local user.name "Github Actions" + git config --local user.email "github-actions@users.noreply.github.com" + + URL_PATH=pr_docs/{{ pr_number }} + URL=https://crossbow.voltrondata.com/$URL_PATH + + git add docs/$URL_PATH + git commit -m "Add docs preview for PR {{ pr_number }}" + git push + echo ":open_book: You can find the preview here: $URL" >> $GITHUB_STEP_SUMMARY diff --git a/dev/tasks/fuzz-tests/github.oss-fuzz.yml b/dev/tasks/fuzz-tests/github.oss-fuzz.yml index e49d73d8d11e7..d7cf516266831 100644 --- a/dev/tasks/fuzz-tests/github.oss-fuzz.yml +++ b/dev/tasks/fuzz-tests/github.oss-fuzz.yml @@ -23,12 +23,7 @@ jobs: test: name: OSS-Fuzz build test runs-on: ubuntu-latest - {% if env is defined %} - env: - {% for key, value in env.items() %} - {{ key }}: "{{ value }}" - {% endfor %} - {% endif %} +{{ macros.github_set_env(env) }} steps: {{ macros.github_checkout_arrow()|indent }} diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb index c8b9713cec3e0..520ff41aec4dd 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb @@ -29,7 +29,7 @@ class ApacheArrowGlib < Formula desc "GLib bindings for Apache Arrow" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-9.0.0-SNAPSHOT/apache-arrow-9.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-10.0.0-SNAPSHOT/apache-arrow-10.0.0-SNAPSHOT.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git" diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb index 4908c22ed6494..edb86d659f490 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -29,7 +29,7 @@ class ApacheArrow < Formula desc "Columnar in-memory analytics layer designed to accelerate big data" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-9.0.0-SNAPSHOT/apache-arrow-9.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-10.0.0-SNAPSHOT/apache-arrow-10.0.0-SNAPSHOT.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git" diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb new file mode 100644 index 0000000000000..79d1da2c212d4 --- /dev/null +++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb @@ -0,0 +1,98 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Cloned from https://github.com/autobrew/homebrew-core/blob/high-sierra/Formula/apache-arrow.rb +# Upstream any relevant changes (dependencies, build args ...) + +class ApacheArrowStatic < Formula + desc "Columnar in-memory analytics layer designed to accelerate big data" + homepage "https://arrow.apache.org/" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-8.0.0.9000/apache-arrow-8.0.0.9000.tar.gz" + # Uncomment and update to test on a release candidate + # mirror "https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-8.0.0-rc1/apache-arrow-8.0.0.tar.gz" + sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" + head "https://github.com/apache/arrow.git" + + bottle do + sha256 cellar: :any, arm64_big_sur: "ef89d21a110b89840cc6148add685d407e75bd633bc8f79625eb33d00e3694b4" + sha256 cellar: :any, big_sur: "6fcb9f55d44eb61d295a8020e039a0622bdc044b220cfffef855f3e3ab8057a1" + sha256 cellar: :any, catalina: "bf71b17766688077fb9b4e679f309742c16524015287dd3633758c679752c977" + end + + depends_on "boost" => :build + depends_on "cmake" => :build + depends_on "aws-sdk-cpp-static" + depends_on "brotli" + depends_on "lz4" + depends_on "snappy" + depends_on "thrift" + depends_on "zstd" + + conflicts_with "apache-arrow", because: "both install Arrow" + + def install + ENV.cxx11 + # https://github.com/Homebrew/homebrew-core/issues/94724 + # https://issues.apache.org/jira/browse/ARROW-15664 + ENV["HOMEBREW_OPTIMIZATION_LEVEL"] = "O2" + + args = %W[ + -DARROW_BUILD_SHARED=OFF + -DARROW_BUILD_UTILITIES=ON + -DARROW_COMPUTE=ON + -DARROW_CSV=ON + -DARROW_DATASET=ON + -DARROW_FILESYSTEM=ON + -DARROW_GCS=ON + -DARROW_JEMALLOC=ON + -DARROW_JSON=ON + -DARROW_MIMALLOC=ON + -DARROW_PARQUET=ON + -DARROW_S3=ON + -DARROW_VERBOSE_THIRDPARTY_BUILD=ON + -DARROW_WITH_BROTLI=ON + -DARROW_WITH_BZ2=ON + -DARROW_WITH_LZ4=ON + -DARROW_WITH_SNAPPY=ON + -DARROW_WITH_ZLIB=ON + -DARROW_WITH_ZSTD=ON + -DLZ4_HOME=#{Formula["lz4"].prefix} + -DPARQUET_BUILD_EXECUTABLES=ON + -DTHRIFT_HOME=#{Formula["thrift"].prefix} + ] + + mkdir "build" + cd "build" do + system "cmake", "../cpp", *std_cmake_args, *args + system "make" + system "make", "install" + end + end + + test do + (testpath/"test.cpp").write <<~EOS + #include "arrow/api.h" + int main(void) { + arrow::int64(); + return 0; + } + EOS + system ENV.cxx, "test.cpp", "-std=c++11", "-I#{include}", "-L#{lib}", \ + "-larrow", "-larrow_bundled_dependencies", "-o", "test" + system "./test" + end +end diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb index dde994ab43396..de0c65dae40e6 100644 --- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb @@ -16,24 +16,25 @@ # under the License. # https://github.com/autobrew/homebrew-core/blob/master/Formula/apache-arrow.rb + class ApacheArrow < Formula desc "Columnar in-memory analytics layer designed to accelerate big data" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-8.0.0.9000/apache-arrow-8.0.0.9000.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-9.0.0.9000/apache-arrow-9.0.0.9000.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" head "https://github.com/apache/arrow.git" bottle do cellar :any - sha256 "a55211ba6f464681b7ca1b48defdad9cfbe1cf6fad8ff9ec875dc5a3c8f3c5ed" => :el_capitan_or_later + sha256 "9cd44700798638b5e3ee8774b3929f3fad815290d05572d1f39f01d6423eaad0" => :high_sierra root_url "https://autobrew.github.io/bottles" end # NOTE: if you add something here, be sure to add to PKG_LIBS in r/tools/autobrew depends_on "boost" => :build - depends_on "brotli" depends_on "cmake" => :build depends_on "aws-sdk-cpp" + depends_on "brotli" depends_on "lz4" depends_on "snappy" depends_on "thrift" @@ -49,14 +50,11 @@ def install -DARROW_DATASET=ON -DARROW_FILESYSTEM=ON -DARROW_GCS=ON - -DARROW_HDFS=OFF -DARROW_JEMALLOC=ON -DARROW_JSON=ON -DARROW_MIMALLOC=ON -DARROW_PARQUET=ON - -DARROW_PYTHON=OFF -DARROW_S3=ON - -DARROW_USE_GLOG=OFF -DARROW_VERBOSE_THIRDPARTY_BUILD=ON -DARROW_WITH_BROTLI=ON -DARROW_WITH_BZ2=ON @@ -64,9 +62,8 @@ def install -DARROW_WITH_SNAPPY=ON -DARROW_WITH_ZLIB=ON -DARROW_WITH_ZSTD=ON - -DCMAKE_UNITY_BUILD=OFF - -DPARQUET_BUILD_EXECUTABLES=ON -DLZ4_HOME=#{Formula["lz4"].prefix} + -DPARQUET_BUILD_EXECUTABLES=ON -DTHRIFT_HOME=#{Formula["thrift"].prefix} ] @@ -86,7 +83,8 @@ def install return 0; } EOS - system ENV.cxx, "test.cpp", "-std=c++11", "-I#{include}", "-L#{lib}", "-larrow", "-o", "test" + system ENV.cxx, "test.cpp", "-std=c++11", "-I#{include}", "-L#{lib}", \ + "-larrow", "-larrow_bundled_dependencies", "-o", "test" system "./test" end end diff --git a/dev/tasks/homebrew-formulae/github.macos.yml b/dev/tasks/homebrew-formulae/github.macos.yml index 3abb1d4525298..fd69473757324 100644 --- a/dev/tasks/homebrew-formulae/github.macos.yml +++ b/dev/tasks/homebrew-formulae/github.macos.yml @@ -22,7 +22,7 @@ jobs: homebrew: name: "Homebrew" - runs-on: macOS-latest + runs-on: macos-latest steps: {{ macros.github_checkout_arrow()|indent }} diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index 376ba78fe14db..23b97087c394b 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -28,7 +28,11 @@ jobs: {{ macros.github_checkout_arrow()|indent }} {{ macros.github_install_archery()|indent }} - name: Build C++ Libs - run: archery docker run java-jni-manylinux-2014 + run: | + archery docker run \ + -e ARROW_JAVA_BUILD=OFF \ + -e ARROW_JAVA_TEST=OFF \ + java-jni-manylinux-2014 - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/java-dist/ - name: Upload Artifacts @@ -61,6 +65,9 @@ jobs: # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. brew uninstall aws-sdk-cpp + - name: Setup ccache + run: | + arrow/ci/scripts/ccache_setup.sh - name: Build C++ Libs run: | set -e @@ -100,11 +107,13 @@ jobs: test -f arrow/java-dist/libarrow_cdata_jni.dylib test -f arrow/java-dist/libarrow_dataset_jni.dylib test -f arrow/java-dist/libgandiva_jni.dylib + test -f arrow/java-dist/libplasma_java.dylib test -f arrow/java-dist/libarrow_orc_jni.dylib test -f arrow/java-dist/libarrow_cdata_jni.so test -f arrow/java-dist/libarrow_dataset_jni.so test -f arrow/java-dist/libarrow_orc_jni.so test -f arrow/java-dist/libgandiva_jni.so + test -f arrow/java-dist/libplasma_java.so - name: Build Bundled Jar run: | set -e diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog index a55ce7dad7447..d447f95eaa47a 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow-apt-source (9.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs Fri, 29 Jul 2022 16:41:29 -0000 + apache-arrow-apt-source (8.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in index a3f3b48f735f6..d231eb2767a17 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -95,6 +95,9 @@ else fi %changelog +* Fri Jul 29 2022 Krisztián Szűcs - 9.0.0-1 +- New upstream release. + * Tue May 03 2022 Krisztián Szűcs - 8.0.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile index 42a7327614c14..0b50d887128c7 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile @@ -53,7 +53,6 @@ RUN \ libgoogle-glog-dev \ libgrpc++-dev \ libgtest-dev \ - libjemalloc-dev \ liblz4-dev \ libprotoc-dev \ libprotobuf-dev \ @@ -70,7 +69,6 @@ RUN \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ - python3-numpy \ python3-pip \ rapidjson-dev \ tzdata \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile index 1562f1d662c75..600ecc6962e9b 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile @@ -53,7 +53,6 @@ RUN \ libgoogle-glog-dev \ libgrpc++-dev \ libgtest-dev \ - libjemalloc-dev \ liblz4-dev \ libprotoc-dev \ libprotobuf-dev \ @@ -70,7 +69,6 @@ RUN \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ - python3-numpy \ python3-pip \ rapidjson-dev \ tzdata \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile index 64ebb3b72dd8a..b897fa73a33de 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile @@ -48,7 +48,6 @@ RUN \ libglib2.0-doc \ libgoogle-glog-dev \ libgtest-dev \ - libjemalloc-dev \ liblz4-dev \ libre2-dev \ libsnappy-dev \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile index 9efc5602687b3..cea9eeb6fac72 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile @@ -50,7 +50,6 @@ RUN \ libgmock-dev \ libgoogle-glog-dev \ libgtest-dev \ - libjemalloc-dev \ liblz4-dev \ libre2-dev \ libsnappy-dev \ @@ -64,7 +63,6 @@ RUN \ nlohmann-json3-dev \ pkg-config \ python3-dev \ - python3-numpy \ python3-pip \ python3-setuptools \ rapidjson-dev \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile index e6f657533f8e2..e305479f721b5 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile @@ -51,7 +51,6 @@ RUN \ libgoogle-glog-dev \ libgrpc++-dev \ libgtest-dev \ - libjemalloc-dev \ liblz4-dev \ libprotoc-dev \ libprotobuf-dev \ @@ -68,7 +67,6 @@ RUN \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ - python3-numpy \ python3-pip \ python3-setuptools \ rapidjson-dev \ diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog index 1af102bfd4344..208a8e247b62b 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow (9.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs Fri, 29 Jul 2022 16:41:29 -0000 + apache-arrow (8.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index 8de71bda75590..1e05faf282895 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -18,7 +18,6 @@ Build-Depends: libgoogle-glog-dev, @USE_SYSTEM_GRPC@ libgrpc++-dev, libgtest-dev, - libjemalloc-dev, liblz4-dev, @USE_SYSTEM_GRPC@ libprotoc-dev, @USE_SYSTEM_GRPC@ libprotobuf-dev, @@ -33,8 +32,6 @@ Build-Depends: nvidia-cuda-toolkit [!arm64], pkg-config, @USE_SYSTEM_GRPC@ protobuf-compiler-grpc, -@ENABLE_PYTHON@ python3-dev, -@ENABLE_PYTHON@ python3-numpy, valac, tzdata, zlib1g-dev @@ -42,7 +39,7 @@ Build-Depends-Indep: libglib2.0-doc Standards-Version: 3.9.8 Homepage: https://arrow.apache.org/ -Package: libarrow900 +Package: libarrow1000 Section: libs Architecture: any Multi-Arch: same @@ -54,7 +51,20 @@ Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files. -Package: libarrow-cuda900 +Package: arrow-tools +Section: utils +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libarrow1000 (= ${binary:Version}) +Description: Apache Arrow is a data processing library for analysis + . + This package provides tools. + +Package: libarrow-cuda1000 Section: libs Architecture: @CUDA_ARCHITECTURE@ Multi-Arch: same @@ -62,12 +72,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow900 (= ${binary:Version}) + libarrow1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for CUDA support. -Package: libarrow-dataset900 +Package: libarrow-dataset1000 Section: libs Architecture: any Multi-Arch: same @@ -75,13 +85,13 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow900 (= ${binary:Version}), - libparquet900 (= ${binary:Version}) + libarrow1000 (= ${binary:Version}), + libparquet1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Dataset module. -Package: libarrow-flight900 +Package: libarrow-flight1000 Section: libs Architecture: any Multi-Arch: same @@ -89,12 +99,12 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow900 (= ${binary:Version}) + libarrow1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Flight RPC system. -Package: libarrow-flight-sql900 +Package: libarrow-flight-sql1000 Section: libs Architecture: any Multi-Arch: same @@ -102,52 +112,23 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-flight900 (= ${binary:Version}) + libarrow-flight1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Flight SQL system. -@ENABLE_PYTHON@Package: libarrow-python900 -@ENABLE_PYTHON@Section: libs -@ENABLE_PYTHON@Architecture: any -@ENABLE_PYTHON@Multi-Arch: same -@ENABLE_PYTHON@Pre-Depends: ${misc:Pre-Depends} -@ENABLE_PYTHON@Depends: -@ENABLE_PYTHON@ ${misc:Depends}, -@ENABLE_PYTHON@ ${shlibs:Depends}, -@ENABLE_PYTHON@ libarrow900 (= ${binary:Version}), -@ENABLE_PYTHON@ python3, -@ENABLE_PYTHON@ python3-numpy -@ENABLE_PYTHON@Description: Apache Arrow is a data processing library for analysis -@ENABLE_PYTHON@ . -@ENABLE_PYTHON@ This package provides C++ library files for Python support. - -@ENABLE_PYTHON@Package: libarrow-python-flight900 -@ENABLE_PYTHON@Section: libs -@ENABLE_PYTHON@Architecture: any -@ENABLE_PYTHON@Multi-Arch: same -@ENABLE_PYTHON@Pre-Depends: ${misc:Pre-Depends} -@ENABLE_PYTHON@Depends: -@ENABLE_PYTHON@ ${misc:Depends}, -@ENABLE_PYTHON@ ${shlibs:Depends}, -@ENABLE_PYTHON@ libarrow-flight900 (= ${binary:Version}), -@ENABLE_PYTHON@ libarrow-python900 (= ${binary:Version}) -@ENABLE_PYTHON@Description: Apache Arrow is a data processing library for analysis -@ENABLE_PYTHON@ . -@ENABLE_PYTHON@ This package provides C++ library files for Flight and Python support. - Package: libarrow-dev Section: libdevel Architecture: any Multi-Arch: same Depends: ${misc:Depends}, - libarrow900 (= ${binary:Version}), + libarrow1000 (= ${binary:Version}), libbrotli-dev, libbz2-dev, @USE_SYSTEM_C_ARES@ libc-ares-dev, + libcurl4-openssl-dev, @USE_SYSTEM_GRPC@ libgrpc++-dev, - libjemalloc-dev, liblz4-dev, libre2-dev, libsnappy-dev, @@ -169,7 +150,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libarrow-cuda900 (= ${binary:Version}) + libarrow-cuda1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for CUDA support. @@ -181,7 +162,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libarrow-dataset900 (= ${binary:Version}), + libarrow-dataset1000 (= ${binary:Version}), libparquet-dev (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -194,7 +175,7 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libarrow-flight900 (= ${binary:Version}) + libarrow-flight1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for Flight RPC system. @@ -206,37 +187,12 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-flight-dev (= ${binary:Version}), - libarrow-flight-sql900 (= ${binary:Version}) + libarrow-flight-sql1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for Flight SQL system. -@ENABLE_PYTHON@Package: libarrow-python-dev -@ENABLE_PYTHON@Section: libdevel -@ENABLE_PYTHON@Architecture: any -@ENABLE_PYTHON@Multi-Arch: same -@ENABLE_PYTHON@Depends: -@ENABLE_PYTHON@ ${misc:Depends}, -@ENABLE_PYTHON@ libarrow-dev (= ${binary:Version}), -@ENABLE_PYTHON@ libarrow-python900 (= ${binary:Version}) -@ENABLE_PYTHON@Description: Apache Arrow is a data processing library for analysis -@ENABLE_PYTHON@ . -@ENABLE_PYTHON@ This package provides C++ header files for Python support. - -@ENABLE_PYTHON@Package: libarrow-python-flight-dev -@ENABLE_PYTHON@Section: libdevel -@ENABLE_PYTHON@Architecture: any -@ENABLE_PYTHON@Multi-Arch: same -@ENABLE_PYTHON@Depends: -@ENABLE_PYTHON@ ${misc:Depends}, -@ENABLE_PYTHON@ libarrow-flight-dev (= ${binary:Version}), -@ENABLE_PYTHON@ libarrow-python-dev (= ${binary:Version}), -@ENABLE_PYTHON@ libarrow-python-flight900 (= ${binary:Version}) -@ENABLE_PYTHON@Description: Apache Arrow is a data processing library for analysis -@ENABLE_PYTHON@ . -@ENABLE_PYTHON@ This package provides C++ header files for Flight and Python support. - -Package: libgandiva900 +Package: libgandiva1000 Section: libs Architecture: any Multi-Arch: same @@ -244,7 +200,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow900 (= ${binary:Version}) + libarrow1000 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . @@ -257,13 +213,13 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libgandiva900 (= ${binary:Version}) + libgandiva1000 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . This package provides C++ header files. -Package: libplasma900 +Package: libplasma1000 Section: libs Architecture: @CUDA_ARCHITECTURE@ Multi-Arch: same @@ -271,7 +227,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-cuda900 (= ${binary:Version}) + libarrow-cuda1000 (= ${binary:Version}) Description: Plasma is an in-memory object store and cache for big data. . This package provides C++ library files to connect plasma-store-server. @@ -283,7 +239,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libplasma900 (= ${binary:Version}) + libplasma1000 (= ${binary:Version}) Description: Plasma is an in-memory object store and cache for big data. . This package provides plasma-store-server. @@ -295,12 +251,12 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-cuda-dev (= ${binary:Version}), - libplasma900 (= ${binary:Version}) + libplasma1000 (= ${binary:Version}) Description: Plasma is an in-memory object store and cache for big data. . This package provides C++ header files. -Package: libparquet900 +Package: libparquet1000 Section: libs Architecture: any Multi-Arch: same @@ -312,6 +268,19 @@ Description: Apache Parquet is a columnar storage format . This package provides C++ library files to process Apache Parquet format. +Package: parquet-tools +Section: utils +Architecture: any +Multi-Arch: same +Pre-Depends: ${misc:Pre-Depends} +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + libparquet1000 (= ${binary:Version}) +Description: Apache Parquet is a columnar storage format + . + This package provides tools. + Package: libparquet-dev Section: libdevel Architecture: any @@ -319,12 +288,12 @@ Multi-Arch: same Depends: ${misc:Depends}, libarrow-dev (= ${binary:Version}), - libparquet900 (= ${binary:Version}) + libparquet1000 (= ${binary:Version}) Description: Apache Parquet is a columnar storage format . This package provides C++ header files. -Package: libarrow-glib900 +Package: libarrow-glib1000 Section: libs Architecture: any Multi-Arch: same @@ -332,7 +301,7 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow900 (= ${binary:Version}) + libarrow1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files. @@ -356,7 +325,7 @@ Depends: ${misc:Depends}, libglib2.0-dev, libarrow-dev (= ${binary:Version}), - libarrow-glib900 (= ${binary:Version}), + libarrow-glib1000 (= ${binary:Version}), gir1.2-arrow-1.0 (= ${binary:Version}) Suggests: libarrow-glib-doc Description: Apache Arrow is a data processing library for analysis @@ -374,7 +343,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations. -Package: libarrow-cuda-glib900 +Package: libarrow-cuda-glib1000 Section: libs Architecture: @CUDA_ARCHITECTURE@ Multi-Arch: same @@ -382,8 +351,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib900 (= ${binary:Version}), - libarrow-cuda900 (= ${binary:Version}) + libarrow-glib1000 (= ${binary:Version}), + libarrow-cuda1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for CUDA support. @@ -408,13 +377,13 @@ Depends: ${misc:Depends}, libarrow-cuda-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-cuda-glib900 (= ${binary:Version}), + libarrow-cuda-glib1000 (= ${binary:Version}), gir1.2-arrow-cuda-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based header files for CUDA support. -Package: libarrow-dataset-glib900 +Package: libarrow-dataset-glib1000 Section: libs Architecture: any Multi-Arch: same @@ -422,8 +391,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib900 (= ${binary:Version}), - libarrow-dataset900 (= ${binary:Version}) + libarrow-glib1000 (= ${binary:Version}), + libarrow-dataset1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for dataset module. @@ -448,7 +417,7 @@ Depends: ${misc:Depends}, libarrow-dataset-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-dataset-glib900 (= ${binary:Version}), + libarrow-dataset-glib1000 (= ${binary:Version}), gir1.2-arrow-dataset-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -465,7 +434,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for dataset module. -Package: libarrow-flight-glib900 +Package: libarrow-flight-glib1000 Section: libs Architecture: any Multi-Arch: same @@ -473,8 +442,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib900 (= ${binary:Version}), - libarrow-flight900 (= ${binary:Version}) + libarrow-glib1000 (= ${binary:Version}), + libarrow-flight1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for Apache Arrow Flight. @@ -500,7 +469,7 @@ Depends: ${misc:Depends}, libarrow-flight-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libarrow-flight-glib900 (= ${binary:Version}), + libarrow-flight-glib1000 (= ${binary:Version}), gir1.2-arrow-flight-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -517,7 +486,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for Apache Arrow Flight. -Package: libarrow-flight-sql-glib900 +Package: libarrow-flight-sql-glib1000 Section: libs Architecture: any Multi-Arch: same @@ -525,8 +494,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-flight-glib900 (= ${binary:Version}), - libarrow-flight-sql900 (= ${binary:Version}) + libarrow-flight-glib1000 (= ${binary:Version}), + libarrow-flight-sql1000 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . This package provides GLib based library files for Apache Arrow Flight SQL. @@ -552,7 +521,7 @@ Depends: ${misc:Depends}, libarrow-flight-sql-dev (= ${binary:Version}), libarrow-flight-glib-dev (= ${binary:Version}), - libarrow-flight-sql-glib900 (= ${binary:Version}), + libarrow-flight-sql-glib1000 (= ${binary:Version}), gir1.2-arrow-flight-sql-1.0 (= ${binary:Version}) Description: Apache Arrow is a data processing library for analysis . @@ -569,7 +538,7 @@ Description: Apache Arrow is a data processing library for analysis . This package provides documentations for Apache Arrow Flight SQL. -Package: libgandiva-glib900 +Package: libgandiva-glib1000 Section: libs Architecture: any Multi-Arch: same @@ -577,8 +546,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib900 (= ${binary:Version}), - libgandiva900 (= ${binary:Version}) + libarrow-glib1000 (= ${binary:Version}), + libgandiva1000 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. . @@ -605,7 +574,7 @@ Depends: ${misc:Depends}, libgandiva-dev (= ${binary:Version}), libarrow-glib-dev (= ${binary:Version}), - libgandiva-glib900 (= ${binary:Version}), + libgandiva-glib1000 (= ${binary:Version}), gir1.2-gandiva-1.0 (= ${binary:Version}) Description: Gandiva is a toolset for compiling and evaluating expressions on Arrow Data. @@ -624,7 +593,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions . This package provides documentations. -Package: libplasma-glib900 +Package: libplasma-glib1000 Section: libs Architecture: @CUDA_ARCHITECTURE@ Multi-Arch: same @@ -632,8 +601,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-cuda-glib900 (= ${binary:Version}), - libplasma900 (= ${binary:Version}) + libarrow-cuda-glib1000 (= ${binary:Version}), + libplasma1000 (= ${binary:Version}) Description: Plasma is an in-memory object store and cache for big data. . This package provides GLib based library files to connect plasma-store-server. @@ -658,7 +627,7 @@ Depends: ${misc:Depends}, libplasma-dev (= ${binary:Version}), libarrow-cuda-glib-dev (= ${binary:Version}), - libplasma-glib900 (= ${binary:Version}), + libplasma-glib1000 (= ${binary:Version}), gir1.2-plasma-1.0 (= ${binary:Version}) Description: Plasma is an in-memory object store and cache for big data. . @@ -675,7 +644,7 @@ Description: Plasma is an in-memory object store and cache for big data. . This package provides documentations. -Package: libparquet-glib900 +Package: libparquet-glib1000 Section: libs Architecture: any Multi-Arch: same @@ -683,8 +652,8 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends}, - libarrow-glib900 (= ${binary:Version}), - libparquet900 (= ${binary:Version}) + libarrow-glib1000 (= ${binary:Version}), + libparquet1000 (= ${binary:Version}) Description: Apache Parquet is a columnar storage format . This package provides GLib based library files. @@ -709,7 +678,7 @@ Depends: ${misc:Depends}, libarrow-glib-dev (= ${binary:Version}), libparquet-dev (= ${binary:Version}), - libparquet-glib900 (= ${binary:Version}), + libparquet-glib1000 (= ${binary:Version}), gir1.2-parquet-1.0 (= ${binary:Version}) Suggests: libparquet-glib-doc Description: Apache Parquet is a columnar storage format diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install index b55c76d69289b..40a834ced42f1 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install +++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install @@ -4,7 +4,6 @@ usr/lib/*/cmake/arrow/ArrowOptions.cmake usr/lib/*/cmake/arrow/ArrowTargets*.cmake usr/lib/*/cmake/arrow/Find*Alt.cmake usr/lib/*/cmake/arrow/FindArrow.cmake -usr/lib/*/cmake/arrow/Findjemalloc.cmake usr/lib/*/cmake/arrow/Find[BTuz]*.cmake usr/lib/*/cmake/arrow/arrow-config.cmake usr/lib/*/libarrow.a diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install deleted file mode 100644 index 807583f9845e6..0000000000000 --- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install +++ /dev/null @@ -1,6 +0,0 @@ -usr/lib/*/cmake/arrow/ArrowPythonConfig*.cmake -usr/lib/*/cmake/arrow/ArrowPythonTargets*.cmake -usr/lib/*/cmake/arrow/FindArrowPython.cmake -usr/lib/*/libarrow_python.a -usr/lib/*/libarrow_python.so -usr/lib/*/pkgconfig/arrow-python.pc diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install deleted file mode 100644 index 6cf96e227e946..0000000000000 --- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install +++ /dev/null @@ -1,6 +0,0 @@ -usr/lib/*/cmake/arrow/ArrowPythonFlightConfig*.cmake -usr/lib/*/cmake/arrow/ArrowPythonFlightTargets*.cmake -usr/lib/*/cmake/arrow/FindArrowPythonFlight.cmake -usr/lib/*/libarrow_python_flight.a -usr/lib/*/libarrow_python_flight.so -usr/lib/*/pkgconfig/arrow-python-flight.pc diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight900.install deleted file mode 100644 index b7cbfec1f0501..0000000000000 --- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight900.install +++ /dev/null @@ -1 +0,0 @@ -usr/lib/*/libarrow_python_flight.so.* diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python900.install deleted file mode 100644 index eef3e66483739..0000000000000 --- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python900.install +++ /dev/null @@ -1 +0,0 @@ -usr/lib/*/libarrow_python.so.* diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow900.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib900.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva900.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib900.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet900.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib900.install b/dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libplasma900.install b/dev/tasks/linux-packages/apache-arrow/debian/libplasma1000.install similarity index 100% rename from dev/tasks/linux-packages/apache-arrow/debian/libplasma900.install rename to dev/tasks/linux-packages/apache-arrow/debian/libplasma1000.install diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index 9178111ebaf0c..b9664c15a7cdb 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -21,16 +21,12 @@ override_dh_auto_configure: ARROW_CUDA=OFF; \ ARROW_PLASMA=OFF; \ fi; \ - if python3 -c 'import numpy' > /dev/null 2>&1; then \ - ARROW_PYTHON=ON; \ - else \ - ARROW_PYTHON=OFF; \ - fi; \ dh_auto_configure \ --sourcedirectory=cpp \ --builddirectory=cpp_build \ --buildsystem=cmake+ninja \ -- \ + -DARROW_BUILD_UTILITIES=ON \ -DARROW_COMPUTE=ON \ -DARROW_CSV=ON \ -DARROW_CUDA=$${ARROW_CUDA} \ @@ -48,7 +44,6 @@ override_dh_auto_configure: -DARROW_PACKAGE_KIND=deb \ -DARROW_PARQUET=ON \ -DARROW_PLASMA=$${ARROW_PLASMA} \ - -DARROW_PYTHON=$${ARROW_PYTHON} \ -DARROW_S3=ON \ -DARROW_USE_CCACHE=OFF \ -DARROW_WITH_BROTLI=ON \ @@ -60,10 +55,8 @@ override_dh_auto_configure: -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DCMAKE_UNITY_BUILD=ON \ -DCUDAToolkit_ROOT=/usr \ - -DPARQUET_REQUIRE_ENCRYPTION=ON \ - -DPythonInterp_FIND_VERSION=ON \ - -DPythonInterp_FIND_VERSION_MAJOR=3 \ - -Djemalloc_SOURCE=SYSTEM + -DPARQUET_BUILD_EXECUTABLES=ON \ + -DPARQUET_REQUIRE_ENCRYPTION=ON override_dh_auto_build: dh_auto_build \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile index 0a37bab4b12cd..5a5fd903bfc36 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile @@ -40,7 +40,6 @@ RUN \ glog-devel \ gobject-introspection-devel \ gtk-doc \ - jemalloc-devel \ json-devel \ libarchive \ libzstd-devel \ @@ -53,8 +52,6 @@ RUN \ openssl-devel \ pkg-config \ python39 \ - python39-devel \ - python39-numpy \ python39-pip \ re2-devel \ # rapidjson-devel \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile index 8561779f658ad..dde7930ff5b7f 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile @@ -39,7 +39,6 @@ RUN \ git \ gobject-introspection-devel \ gtk-doc \ - jemalloc-devel \ json-devel \ libarchive \ libzstd-devel \ @@ -52,8 +51,6 @@ RUN \ openssl-devel \ pkg-config \ python3 \ - python3-devel \ - python3-numpy \ python3-pip \ re2-devel \ rapidjson-devel \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile index fa1ded66bcea0..68d604d64ed03 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2/Dockerfile @@ -37,7 +37,6 @@ RUN \ glog-devel \ gobject-introspection-devel \ gtk-doc \ - jemalloc-devel \ json-devel \ lz4-devel \ make \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 78f974f97111e..872f461fa4a8d 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -64,7 +64,6 @@ %define use_gflags (!%{is_amazon_linux}) %define use_glog (%{rhel} <= 8) %define use_mimalloc (%{rhel} >= 8) -%define use_python (%{rhel} >= 8) # TODO: Enable this. This works on local but is fragile on GitHub Actions and # Travis CI. # %%define use_s3 (%%{rhel} >= 8) @@ -105,7 +104,6 @@ BuildRequires: git %if %{use_glog} BuildRequires: glog-devel %endif -BuildRequires: jemalloc-devel %if %{use_gcs} BuildRequires: json-devel %endif @@ -116,10 +114,6 @@ BuildRequires: lz4-devel %{lz4_requirement} BuildRequires: ninja-build BuildRequires: openssl-devel BuildRequires: pkgconfig -%if %{use_python} -BuildRequires: python%{python_version}-devel -BuildRequires: python%{python_version}-numpy -%endif %if %{have_rapidjson} BuildRequires: rapidjson-devel %endif @@ -154,6 +148,7 @@ Apache Arrow is a data processing library for analysis. cpp_build_type=release cd cpp %arrow_cmake \ + -DARROW_BUILD_UTILITIES=ON \ -DARROW_CSV=ON \ -DARROW_DATASET=ON \ %if %{use_flight} @@ -175,9 +170,6 @@ cd cpp -DARROW_PACKAGE_KIND=rpm \ -DARROW_PARQUET=ON \ -DARROW_PLASMA=ON \ -%if %{use_python} - -DARROW_PYTHON=ON \ -%endif %if %{use_s3} -DARROW_S3=ON \ %endif @@ -189,12 +181,8 @@ cd cpp -DARROW_WITH_ZLIB=ON \ -DARROW_WITH_ZSTD=ON \ -DCMAKE_BUILD_TYPE=$cpp_build_type \ + -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON \ -%if %{use_python} - -DPythonInterp_FIND_VERSION=ON \ - -DPythonInterp_FIND_VERSION_MAJOR=3 \ -%endif - -Djemalloc_SOURCE=SYSTEM \ -G"Unix Makefiles" %arrow_cmake_build cd - @@ -243,7 +231,6 @@ Requires: gflags %if %{use_glog} Requires: glog %endif -Requires: jemalloc %if %{have_zstd} Requires: libzstd %endif @@ -266,6 +253,20 @@ This package contains the libraries for Apache Arrow C++. %license LICENSE.txt NOTICE.txt %{_libdir}/libarrow.so.* +%package tools +Summary: Tools for Apache Arrow C++ +License: Apache-2.0 +Requires: %{name}%{major_version}-libs = %{version}-%{release} + +%description tools +Tools for Apache Arrow C++. + +%files tools +%defattr(-,root,root,-) +%doc README.md +%license LICENSE.txt NOTICE.txt +%{_bindir}/arrow-* + %package devel Summary: Libraries and header files for Apache Arrow C++ License: Apache-2.0 @@ -275,7 +276,7 @@ Requires: bzip2-devel %if %{use_flight} Requires: c-ares-devel %endif -Requires: jemalloc-devel +Requires: curl-devel %if %{use_gcs} Requires: json-devel %endif @@ -323,7 +324,6 @@ Libraries and header files for Apache Arrow C++. %if %{use_flight} %{_libdir}/cmake/arrow/Findc-aresAlt.cmake %endif -%{_libdir}/cmake/arrow/Findjemalloc.cmake %{_libdir}/cmake/arrow/Findlz4Alt.cmake %if %{have_re2} %{_libdir}/cmake/arrow/Findre2Alt.cmake @@ -494,86 +494,6 @@ Libraries and header files for Gandiva. %{_libdir}/pkgconfig/gandiva.pc %endif -%if %{use_python} -%package -n %{name}%{major_version}-python-libs -Summary: Python integration library for Apache Arrow -License: Apache-2.0 -Requires: %{name}%{major_version}-libs = %{version}-%{release} -Requires: python%{python_version}-numpy - -%description -n %{name}%{major_version}-python-libs -This package contains the Python integration library for Apache Arrow. - -%files -n %{name}%{major_version}-python-libs -%defattr(-,root,root,-) -%doc README.md -%license LICENSE.txt NOTICE.txt -%{_libdir}/libarrow_python.so.* - -%package python-devel -Summary: Libraries and header files for Python integration library for Apache Arrow -License: Apache-2.0 -Requires: %{name}%{major_version}-python-libs = %{version}-%{release} -Requires: %{name}-devel = %{version}-%{release} -Requires: python%{python_version}-devel - -%description python-devel -Libraries and header files for Python integration library for Apache Arrow. - -%files python-devel -%defattr(-,root,root,-) -%doc README.md -%license LICENSE.txt NOTICE.txt -%{_includedir}/arrow/python/ -%exclude %{_includedir}/arrow/python/flight.h -%{_libdir}/cmake/arrow/ArrowPythonConfig*.cmake -%{_libdir}/cmake/arrow/ArrowPythonTargets*.cmake -%{_libdir}/cmake/arrow/FindArrowPython.cmake -%{_libdir}/libarrow_python.a -%{_libdir}/libarrow_python.so -%{_libdir}/pkgconfig/arrow-python.pc - -%if %{use_flight} -%package -n %{name}%{major_version}-python-flight-libs -Summary: Python integration library for Apache Arrow Flight -License: Apache-2.0 -Requires: %{name}%{major_version}-flight-libs = %{version}-%{release} -Requires: %{name}%{major_version}-python-libs = %{version}-%{release} - -%description -n %{name}%{major_version}-python-flight-libs -This package contains the Python integration library for Apache Arrow Flight. - -%files -n %{name}%{major_version}-python-flight-libs -%defattr(-,root,root,-) -%doc README.md -%license LICENSE.txt NOTICE.txt -%{_libdir}/libarrow_python_flight.so.* - -%package python-flight-devel -Summary: Libraries and header files for Python integration library for Apache Arrow Flight. -License: Apache-2.0 -Requires: %{name}%{major_version}-python-flight-libs = %{version}-%{release} -Requires: %{name}-flight-devel = %{version}-%{release} -Requires: %{name}-python-devel = %{version}-%{release} - -%description python-flight-devel -Libraries and header files for Python integration library for -Apache Arrow Flight. - -%files python-flight-devel -%defattr(-,root,root,-) -%doc README.md -%license LICENSE.txt NOTICE.txt -%{_includedir}/arrow/python/flight.h -%{_libdir}/cmake/arrow/ArrowPythonFlightConfig*.cmake -%{_libdir}/cmake/arrow/ArrowPythonFlightTargets*.cmake -%{_libdir}/cmake/arrow/FindArrowPythonFlight.cmake -%{_libdir}/libarrow_python_flight.a -%{_libdir}/libarrow_python_flight.so -%{_libdir}/pkgconfig/arrow-python-flight.pc -%endif -%endif - %package -n plasma%{major_version}-libs Summary: Runtime libraries for Plasma in-memory object store License: Apache-2.0 @@ -638,6 +558,20 @@ This package contains the libraries for Apache Parquet C++. %license LICENSE.txt NOTICE.txt %{_libdir}/libparquet.so.* +%package -n parquet-tools +Summary: Tools for Apache Parquet C++ +License: Apache-2.0 +Requires: parquet%{major_version}-libs = %{version}-%{release} + +%description -n parquet-tools +Tools for Apache Parquet C++. + +%files -n parquet-tools +%defattr(-,root,root,-) +%doc README.md +%license LICENSE.txt NOTICE.txt +%{_bindir}/parquet-* + %package -n parquet-devel Summary: Libraries and header files for Apache Parquet C++ License: Apache-2.0 @@ -673,8 +607,8 @@ This package contains the libraries for Apache Arrow GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_libdir}/girepository-1.0/Arrow-*.typelib %{_libdir}/libarrow-glib.so.* -%{_datadir}/gir-1.0/Arrow-1.0.gir %package glib-devel Summary: Libraries and header files for Apache Arrow GLib @@ -691,14 +625,14 @@ Libraries and header files for Apache Arrow GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_datadir}/arrow-glib/example/ +%{_datadir}/gir-1.0/Arrow-*.gir +%{_datadir}/vala/vapi/arrow-glib.* %{_includedir}/arrow-glib/ %{_libdir}/libarrow-glib.a %{_libdir}/libarrow-glib.so %{_libdir}/pkgconfig/arrow-glib.pc %{_libdir}/pkgconfig/arrow-orc-glib.pc -%{_libdir}/girepository-1.0/Arrow-1.0.typelib -%{_datadir}/arrow-glib/example/ -%{_datadir}/vala/vapi/arrow-glib.* %package glib-doc Summary: Documentation for Apache Arrow GLib @@ -727,8 +661,8 @@ This package contains the libraries for Apache Arrow Dataset GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_libdir}/girepository-1.0/ArrowDataset-*.typelib %{_libdir}/libarrow-dataset-glib.so.* -%{_datadir}/gir-1.0/ArrowDataset-1.0.gir %package dataset-glib-devel Summary: Libraries and header files for Apache Arrow Dataset GLib @@ -744,12 +678,12 @@ Libraries and header files for Apache Arrow Dataset GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_datadir}/gir-1.0/ArrowDataset-*.gir +%{_datadir}/vala/vapi/arrow-dataset-glib.* %{_includedir}/arrow-dataset-glib/ %{_libdir}/libarrow-dataset-glib.a %{_libdir}/libarrow-dataset-glib.so %{_libdir}/pkgconfig/arrow-dataset-glib.pc -%{_libdir}/girepository-1.0/ArrowDataset-1.0.typelib -%{_datadir}/vala/vapi/arrow-dataset-glib.* %package dataset-glib-doc Summary: Documentation for Apache Arrow Dataset GLib @@ -778,8 +712,8 @@ This package contains the libraries for Apache Arrow Flight GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_libdir}/girepository-1.0/ArrowFlight-*.typelib %{_libdir}/libarrow-flight-glib.so.* -%{_datadir}/gir-1.0/ArrowFlight-1.0.gir %package flight-glib-devel Summary: Libraries and header files for Apache Arrow Flight GLib @@ -795,12 +729,12 @@ Libraries and header files for Apache Arrow Flight GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_datadir}/gir-1.0/ArrowFlight-*.gir +%{_datadir}/vala/vapi/arrow-flight-glib.* %{_includedir}/arrow-flight-glib/ %{_libdir}/libarrow-flight-glib.a %{_libdir}/libarrow-flight-glib.so %{_libdir}/pkgconfig/arrow-flight-glib.pc -%{_libdir}/girepository-1.0/ArrowFlight-1.0.typelib -%{_datadir}/vala/vapi/arrow-flight-glib.* %package flight-glib-doc Summary: Documentation for Apache Arrow Flight GLib @@ -828,8 +762,8 @@ This package contains the libraries for Apache Arrow Flight SQL GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_libdir}/girepository-1.0/ArrowFlightSQL-*.typelib %{_libdir}/libarrow-flight-sql-glib.so.* -%{_datadir}/gir-1.0/ArrowFlightSQL-1.0.gir %package flight-sql-glib-devel Summary: Libraries and header files for Apache Arrow Flight SQL GLib @@ -845,12 +779,12 @@ Libraries and header files for Apache Arrow Flight SQL GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_datadir}/gir-1.0/ArrowFlightSQL-*.gir +%{_datadir}/vala/vapi/arrow-flight-sql-glib.* %{_includedir}/arrow-flight-sql-glib/ %{_libdir}/libarrow-flight-sql-glib.a %{_libdir}/libarrow-flight-sql-glib.so %{_libdir}/pkgconfig/arrow-flight-sql-glib.pc -%{_libdir}/girepository-1.0/ArrowFlightSQL-1.0.typelib -%{_datadir}/vala/vapi/arrow-flight-sql-glib.* %package flight-sql-glib-doc Summary: Documentation for Apache Arrow Flight SQL GLib @@ -880,8 +814,8 @@ This package contains the libraries for Gandiva GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_libdir}/girepository-1.0/Gandiva-*.typelib %{_libdir}/libgandiva-glib.so.* -%{_datadir}/gir-1.0/Gandiva-1.0.gir %package -n gandiva-glib-devel Summary: Libraries and header files for Gandiva GLib @@ -897,12 +831,12 @@ Libraries and header files for Gandiva GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_datadir}/gir-1.0/Gandiva-*.gir +%{_datadir}/vala/vapi/gandiva-glib.* %{_includedir}/gandiva-glib/ %{_libdir}/libgandiva-glib.a %{_libdir}/libgandiva-glib.so %{_libdir}/pkgconfig/gandiva-glib.pc -%{_libdir}/girepository-1.0/Gandiva-1.0.typelib -%{_datadir}/vala/vapi/gandiva-glib.* %package -n gandiva-glib-doc Summary: Documentation for Gandiva GLib @@ -931,8 +865,8 @@ This package contains the libraries for Plasma GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_libdir}/girepository-1.0/Plasma-*.typelib %{_libdir}/libplasma-glib.so.* -%{_datadir}/gir-1.0/Plasma-1.0.gir %package -n plasma-glib-devel Summary: Libraries and header files for Plasma GLib @@ -948,12 +882,12 @@ Libraries and header files for Plasma GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_datadir}/gir-1.0/Plasma-*gir +%{_datadir}/vala/vapi/plasma-glib.* %{_includedir}/plasma-glib/ %{_libdir}/libplasma-glib.a %{_libdir}/libplasma-glib.so %{_libdir}/pkgconfig/plasma-glib.pc -%{_libdir}/girepository-1.0/Plasma-1.0.typelib -%{_datadir}/vala/vapi/plasma-glib.* %package -n plasma-glib-doc Summary: Documentation for Plasma GLib @@ -981,8 +915,8 @@ This package contains the libraries for Apache Parquet GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_libdir}/girepository-1.0/Parquet-*.typelib %{_libdir}/libparquet-glib.so.* -%{_datadir}/gir-1.0/Parquet-1.0.gir %package -n parquet-glib-devel Summary: Libraries and header files for Apache Parquet GLib @@ -998,12 +932,12 @@ Libraries and header files for Apache Parquet GLib. %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt +%{_datadir}/gir-1.0/Parquet-*.gir +%{_datadir}/vala/vapi/parquet-glib.* %{_includedir}/parquet-glib/ %{_libdir}/libparquet-glib.a %{_libdir}/libparquet-glib.so %{_libdir}/pkgconfig/parquet-glib.pc -%{_libdir}/girepository-1.0/Parquet-1.0.typelib -%{_datadir}/vala/vapi/parquet-glib.* %package -n parquet-glib-doc Summary: Documentation for Apache Parquet GLib @@ -1019,6 +953,9 @@ Documentation for Apache Parquet GLib. %{_datadir}/gtk-doc/html/parquet-glib/ %changelog +* Fri Jul 29 2022 Krisztián Szűcs - 9.0.0-1 +- New upstream release. + * Tue May 03 2022 Krisztián Szűcs - 8.0.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile index 93d4a7812b11a..04e74012f9521 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile @@ -39,7 +39,6 @@ RUN \ glog-devel \ gobject-introspection-devel \ gtk-doc \ - jemalloc-devel \ json-devel \ libzstd-devel \ lz4-devel \ @@ -48,8 +47,6 @@ RUN \ openssl-devel \ pkg-config \ python36 \ - python36-devel \ - python36-numpy \ rapidjson-devel \ rpmdevtools \ snappy-devel \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile index ca9e44d409d48..5dba632628a77 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile @@ -40,7 +40,6 @@ RUN \ glog-devel \ gobject-introspection-devel \ gtk-doc \ - jemalloc-devel \ json-devel \ libarchive \ libzstd-devel \ @@ -53,8 +52,6 @@ RUN \ openssl-devel \ pkg-config \ python39 \ - python39-devel \ - python39-numpy \ python39-pip \ re2-devel \ rapidjson-devel \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile index 2bc036432cd1f..6f40cd71e9f0d 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile @@ -18,12 +18,15 @@ ARG FROM=quay.io/centos/centos:stream9 FROM ${FROM} +ENV SCL=gcc-toolset-12 + ARG DEBUG RUN \ quiet=$([ "${DEBUG}" = "yes" ] || echo "--quiet") && \ dnf install -y ${quiet} epel-release && \ dnf install --enablerepo=crb -y ${quiet} \ + ${SCL} \ bison \ boost-devel \ brotli-devel \ @@ -39,7 +42,6 @@ RUN \ git \ gobject-introspection-devel \ gtk-doc \ - jemalloc-devel \ json-devel \ libarchive \ libzstd-devel \ @@ -52,8 +54,6 @@ RUN \ openssl-devel \ pkg-config \ python3 \ - python3-devel \ - python3-numpy \ python3-pip \ re2-devel \ rapidjson-devel \ diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja index c7f7c647b7b5d..3e87d507e77e1 100644 --- a/dev/tasks/macros.jinja +++ b/dev/tasks/macros.jinja @@ -69,25 +69,43 @@ on: uses: actions/setup-python@v4 with: python-version: 3.8 + - name: Checkout Crossbow + uses: actions/checkout@v3 + with: + path: crossbow + ref: {{ job.branch }} - name: Setup Crossbow shell: bash - run: pip install -e arrow/dev/archery[crossbow-upload] + run: pip install -e arrow/dev/archery[crossbow] - name: Upload artifacts shell: bash run: | archery crossbow \ - --queue-path $(pwd) \ - --queue-remote {{ queue_remote_url }} \ - upload-artifacts \ - --sha {{ task.branch }} \ - --tag {{ task.tag }} \ - {% if pattern is string %} - "{{ pattern }}" - {% elif pattern is iterable %} - {% for p in pattern %} - "{{ p }}" {{ "\\" if not loop.last else "" }} - {% endfor %} - {% endif %} + --queue-path $(pwd)/crossbow \ + --queue-remote {{ queue_remote_url }} \ + upload-artifacts \ + --sha {{ task.branch }} \ + --tag {{ task.tag }} \ + {% if pattern is string %} + "{{ pattern }}" + {% elif pattern is iterable %} + {% for p in pattern %} + "{{ p }}" {{ "\\" if not loop.last else "" }} + {% endfor %} + {% endif %} + env: + CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}' }} + - name: Verify uploaded artifacts + shell: bash + run: | + archery crossbow \ + --queue-path $(pwd)/crossbow \ + --queue-remote {{ queue_remote_url }} \ + status \ + --task-filter '{{ task.name }}' \ + --no-fetch \ + --validate \ + {{ job.branch }} env: CROSSBOW_GITHUB_TOKEN: {{ '${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}' }} {% endmacro %} @@ -170,7 +188,7 @@ on: {%- macro travis_upload_releases(pattern) -%} - sudo -H pip3 install pygit2==1.0 cryptography==36 - - sudo -H pip3 install -e arrow/dev/archery[crossbow-upload] + - sudo -H pip3 install -e arrow/dev/archery[crossbow] - | archery crossbow \ --queue-path $(pwd) \ @@ -185,6 +203,16 @@ on: "{{ p }}" {{ "\\" if not loop.last else "" }} {% endfor %} {% endif %} + - git fetch origin {{ job.branch }}:remotes/origin/{{ job.branch }} + - | + archery crossbow \ + --queue-path $(pwd) \ + --queue-remote {{ queue_remote_url }} \ + status \ + --task-filter '{{ task.name }}' \ + --no-fetch \ + --validate \ + {{ job.branch }} {% endmacro %} {%- macro travis_upload_gemfury(pattern) -%} @@ -226,6 +254,21 @@ on: done {% endmacro %} +{%- macro pin_brew_formulae(is_fork = false) -%} +{#- This macro expects the cwd to be arrow/r -#} + # Copy all *brew formulae + cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow*.rb tools/ + + # Pin the git commit in the formula to match + cd tools + if [ "{{ is_fork }}" == "true" ]; then + sed -i.bak -E -e 's/apache\/arrow.git"$/{{ arrow.github_repo.split("/") | join("\/") }}.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow*.rb + else + sed -i.bak -E -e 's/arrow.git"$/arrow.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow*.rb + fi + rm -f apache-arrow*.rb.bak +{% endmacro %} + {%- macro github_change_r_pkg_version(is_fork, version = '\\2.\'\"$(date +%Y%m%d)\"\'' ) -%} - name: Modify version shell: bash @@ -236,16 +279,8 @@ on: DESCRIPTION head DESCRIPTION rm -f DESCRIPTION.bak - cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb tools/apache-arrow.rb - # Pin the git commit in the formula to match - cd tools - if [ "{{ is_fork }}" == "true" ]; then - sed -i.bak -E -e 's/apache\/arrow.git"$/{{ arrow.github_repo.split("/") | join("\/") }}.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow.rb - else - sed -i.bak -E -e 's/arrow.git"$/arrow.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow.rb - fi - rm -f apache-arrow.rb.bak + {{ pin_brew_formulae(is_fork) }} {% endmacro %} {%- macro github_test_r_src_pkg() -%} @@ -309,3 +344,20 @@ on: # Set envvar for later steps by appending to $GITHUB_ENV write(paste0("R_PROFILE_USER=", profile_path), file = Sys.getenv("GITHUB_ENV"), append = TRUE) {% endmacro %} + +{# Detect if we are using a fork or the upstream repo #} + {% set is_upstream_b = arrow.github_repo == 'apache/arrow' %} + {# use filter to cast to string and convert to lowercase to match yaml boolean #} + {% set is_fork = (not is_upstream_b)|lower %} + +{% set r_release = '4.2' %} +{% set r_oldrel = '4.1' %} + +{%- macro github_set_env(env) -%} + {% if env is defined %} + env: + {% for key, value in env.items() %} + {{ key }}: "{{ value }}" + {% endfor %} + {% endif %} +{%- endmacro -%} diff --git a/dev/tasks/python-minimal-build/github.linux.yml b/dev/tasks/python-minimal-build/github.linux.yml index 887197d17bf04..e776312b93f95 100644 --- a/dev/tasks/python-minimal-build/github.linux.yml +++ b/dev/tasks/python-minimal-build/github.linux.yml @@ -24,12 +24,7 @@ jobs: name: | Docker Python Minimal Build {{ flags|default("") }} {{ image }} {{ command|default("") }} runs-on: ubuntu-latest - {% if env is defined %} - env: - {% for key, value in env.items() %} - {{ key }}: "{{ value }}" - {% endfor %} - {% endif %} +{{ macros.github_set_env(env) }} steps: {{ macros.github_checkout_arrow(submodules=false)|indent }} diff --git a/dev/tasks/python-wheels/github.osx.amd64.yml b/dev/tasks/python-wheels/github.osx.amd64.yml index f83a4c1bf5298..981ce7f50f734 100644 --- a/dev/tasks/python-wheels/github.osx.amd64.yml +++ b/dev/tasks/python-wheels/github.osx.amd64.yml @@ -34,8 +34,8 @@ env: jobs: build: - name: Build wheel for OS X - runs-on: macos-10.15 + name: Build wheel for macOS + runs-on: macos-latest env: VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite' steps: @@ -65,9 +65,9 @@ jobs: - name: Setup NuGet Credentials shell: bash - env: + env: GITHUB_TOKEN: {{ '${{ secrets.GITHUB_TOKEN }}' }} - run: | + run: | mono `vcpkg fetch nuget | tail -n 1` \ sources add \ -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" \ @@ -75,11 +75,11 @@ jobs: -name "GitHub" \ -username "$GITHUB_REPOSITORY_OWNER" \ -password "$GITHUB_TOKEN" \ - + mono `vcpkg fetch nuget | tail -n 1` \ setapikey "$GITHUB_TOKEN" \ -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" - + - name: Install Packages run: | if [ "${ARROW_S3}" == "ON" ]; then diff --git a/dev/tasks/r/github.macos.autobrew.yml b/dev/tasks/r/github.macos.autobrew.yml index c6c56277de92a..1633682a430b4 100644 --- a/dev/tasks/r/github.macos.autobrew.yml +++ b/dev/tasks/r/github.macos.autobrew.yml @@ -16,44 +16,60 @@ # under the License. {% import 'macros.jinja' as macros with context %} +{% set is_fork = macros.is_fork %} {{ macros.github_header() }} jobs: autobrew: - name: "Autobrew" - runs-on: macOS-10.15 + name: "Autobrew {{ "${{ matrix.platform }}" }}" + runs-on: {{ "${{ matrix.platform }}" }} + strategy: + fail-fast: false + matrix: + platform: + - macos-11 + - macos-10.13 # self-hosted + r-version: + - {{ macros.r_release }} + - {{ macros.r_oldrel }} steps: {{ macros.github_checkout_arrow()|indent }} - - name: Configure autobrew script run: | + # minio is pre-installed on the self-hosted 10.13 runer + if [ {{ '${{ matrix.platform }}' }} != macos-10.13 ]; then + # install minio for tests + brew install minio + fi cd arrow/r - # Put the formula inside r/ so that it's included in the package build - cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb tools/apache-arrow.rb - # Pin the current commit in the formula to test so that we're not always pulling from master - sed -i.bak -E -e 's@https://github.com/apache/arrow.git"$@{{ arrow.remote }}.git", :revision => "{{ arrow.head }}"@' tools/apache-arrow.rb && rm -f tools/apache-arrow.rb.bak - # Sometimes crossbow gives a remote URL with .git and sometimes not. Make sure there's only one - sed -i.bak -E -e 's@.git.git@.git@' tools/apache-arrow.rb && rm -f tools/apache-arrow.rb.bak - # Get minio for S3 testing - brew install minio - - uses: r-lib/actions/setup-r@v1 - - name: Install dependencies - run: | - install.packages("remotes") - remotes::install_deps("arrow/r", dependencies = TRUE) - remotes::install_cran(c("rcmdcheck", "sys", "sessioninfo")) - shell: Rscript {0} - - name: Session info + {{ macros.pin_brew_formulae(is_fork)|indent }} + - uses: r-lib/actions/setup-r@v2 + if: matrix.platform != 'macos-10.13' + with: + r-version: {{ '${{ matrix.r-version }}' }} + use-public-rspm: true + - name: Setup R + if: matrix.platform == 'macos-10.13' run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} + # rig is a system utility that allows for switching + # between pre-installed R version on the self-hosted runners + rig default {{ '${{ matrix.r-version }}' }} + rig system setup-user-lib + rig system add-pak + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + working-directory: 'arrow/r' + extra-packages: | + any::rcmdcheck + any::sys + any::readr - name: Check env: _R_CHECK_CRAN_INCOMING_: false + NOT_CRAN: true ARROW_USE_PKG_CONFIG: false + ARROW_R_DEV: true run: arrow/ci/scripts/r_test.sh arrow - name: Dump install logs run: cat arrow/r/check/arrow.Rcheck/00install.out diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 6e46b3ff439c1..d1a95f64caa51 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -20,13 +20,8 @@ # This allows us to set a custom version via param: # crossbow submit --param custom_version=8.5.3 r-binary-packages # if the param is unset defaults to the usual Ymd naming scheme -{% set package_version = custom_version|default("\\2.\'\"$(date +%Y%m%d)\"\'") %} -# We need this as boolean and string -{% set is_upstream_b = arrow.github_repo == 'apache/arrow' %} -# use filter to cast to string and convert to lowercase to match yaml boolean -{% set is_fork = (not is_upstream_b)|lower %} -{% set is_upstream = is_upstream_b|lower %} - +{% set package_version = custom_version|replace("Unset", "\\2.\'\"$(date +%Y%m%d)\"\'") %} +{% set is_fork = macros.is_fork %} {{ macros.github_header() }} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 39de1e8536418..40133f287a457 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -205,13 +205,18 @@ tasks: ci: github template: docker-tests/github.linux.yml params: + # ARROW-17395: Enable this again when grpc is updated. + # -e ARROW_CONAN_WITH_FLIGHT_RPC=True flags: >- -e ARROW_CONAN_PARQUET=True -e ARROW_CONAN_WITH_BROTLI=True + -e ARROW_CONAN_WITH_BZ2=True -e ARROW_CONAN_WITH_GLOG=True -e ARROW_CONAN_WITH_JEMALLOC=True + -e ARROW_CONAN_WITH_JSON=True -e ARROW_CONAN_WITH_LZ4=True -e ARROW_CONAN_WITH_SNAPPY=True + -e ARROW_CONAN_WITH_ZSTD=True image: conan ########################### Python Minimal ############################ @@ -580,6 +585,7 @@ tasks: - .dsc - .orig.tar.gz artifacts: + - arrow-tools_{no_rc_version}-1_[a-z0-9]+.deb {% if architecture == "amd64" %} - apache-arrow-apt-source_{no_rc_version}-1.debian.tar.xz - apache-arrow-apt-source_{no_rc_version}-1.dsc @@ -598,67 +604,62 @@ tasks: - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset-glib900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-dataset-glib900_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-dataset900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-dataset900_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset-glib1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-dataset-glib1000_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-dataset1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-dataset1000_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-glib900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-glib900_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-glib1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-glib1000_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-flight-sql-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql-glib900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-sql-glib900_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight-sql900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight-sql900_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-flight900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-flight900_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-sql-glib1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-sql-glib1000_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight-sql1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight-sql1000_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-flight1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-flight1000_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-glib900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-glib900_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-python-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-python-flight900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-python-flight900_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-python900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-python900_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow900_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-glib1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-glib1000_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow1000_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva-glib900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libgandiva-glib900_{no_rc_version}-1_[a-z0-9]+.deb - - libgandiva900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libgandiva900_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva-glib1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libgandiva-glib1000_{no_rc_version}-1_[a-z0-9]+.deb + - libgandiva1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libgandiva1000_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet-glib900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libparquet-glib900_{no_rc_version}-1_[a-z0-9]+.deb - - libparquet900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libparquet900_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet-glib1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libparquet-glib1000_{no_rc_version}-1_[a-z0-9]+.deb + - libparquet1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libparquet1000_{no_rc_version}-1_[a-z0-9]+.deb + - parquet-tools_{no_rc_version}-1_[a-z0-9]+.deb {% if architecture == "amd64" %} - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb - gir1.2-plasma-1.0_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda-glib900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-cuda-glib900_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-cuda900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-cuda900_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda-glib1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-cuda-glib1000_{no_rc_version}-1_[a-z0-9]+.deb + - libarrow-cuda1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libarrow-cuda1000_{no_rc_version}-1_[a-z0-9]+.deb - libplasma-dev_{no_rc_version}-1_[a-z0-9]+.deb - libplasma-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb - libplasma-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - - libplasma-glib900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libplasma-glib900_{no_rc_version}-1_[a-z0-9]+.deb - - libplasma900-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libplasma900_{no_rc_version}-1_[a-z0-9]+.deb + - libplasma-glib1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libplasma-glib1000_{no_rc_version}-1_[a-z0-9]+.deb + - libplasma1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb + - libplasma1000_{no_rc_version}-1_[a-z0-9]+.deb - plasma-store-server-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - plasma-store-server_{no_rc_version}-1_[a-z0-9]+.deb {% endif %} @@ -713,7 +714,7 @@ tasks: - arrow-debugsource-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm {% endif %} - arrow-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} + {% if not is_rhel7_based and architecture == "amd64" %} - arrow-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-flight-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - arrow-flight-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm @@ -739,18 +740,12 @@ tasks: - arrow[0-9]+-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm {% endif %} - arrow[0-9]+-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if target != "amazon-linux-2" %} - - arrow-python-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow-python-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-python-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-python-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-python-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - arrow[0-9]+-python-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} {% if architecture == "amd64" %} - arrow-{no_rc_version}-1.[a-z0-9]+.src.rpm + {% endif %} + - arrow-tools-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if not is_rhel7_based %} + - arrow-tools-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm {% endif %} {% if not is_rhel7_based and architecture == "amd64" %} - gandiva-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm @@ -772,6 +767,10 @@ tasks: - parquet[0-9]+-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm {% endif %} - parquet[0-9]+-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + - parquet-tools-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% if not is_rhel7_based %} + - parquet-tools-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm + {% endif %} - plasma-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - plasma-glib-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - plasma-glib-doc-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm @@ -952,6 +951,8 @@ tasks: r-binary-packages: ci: github template: r/github.packages.yml + params: + custom_version: Unset artifacts: - r-lib__libarrow__bin__windows__arrow-[0-9\.]+\.zip - r-lib__libarrow__bin__centos-7__arrow-[0-9\.]+\.zip @@ -1023,7 +1024,7 @@ tasks: params: target: {{ target }} use_conda: True - github_runner: "macos-10.15" + github_runner: "macos-latest" {% endfor %} {% for target in ["cpp", @@ -1039,7 +1040,7 @@ tasks: template: verify-rc/github.macos.amd64.yml params: target: {{ target }} - github_runner: "macos-10.15" + github_runner: "macos-latest" {% endfor %} {% for target in ["cpp", @@ -1063,7 +1064,7 @@ tasks: github_runner: ["self-hosted", "macOS", "arm64"] {% endfor %} - {% for macos_version in ["10.15", "11"] %} + {% for macos_version in ["11", "12"] %} verify-rc-binaries-wheels-macos-{{ macos_version }}-amd64: ci: github template: verify-rc/github.macos.amd64.yml @@ -1111,7 +1112,8 @@ tasks: ############################## Docker tests ################################## -{% for image in ["conda-cpp", +{% for image in ["alpine-linux-cpp", + "conda-cpp", "debian-c-glib", "ubuntu-c-glib", "debian-ruby", @@ -1589,3 +1591,13 @@ tasks: type: minimal_build run: {{ kind }} {% endfor %} + +############################## Utility tasks ############################ + preview-docs: + ci: github + template: docs/github.linux.yml + params: + pr_number: Unset + artifacts: "build/docs.tar.gz" + flags: "-v $PWD/build/:/build/" + image: ubuntu-docs \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 13d7a4da4f88d..751a81fa5540a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -96,6 +96,7 @@ x-hierarchy: # descendant images if any. Archery checks that all node has a corresponding # service entry, so any new image/service must be listed here. - almalinux-verify-rc + - alpine-linux-cpp - centos-cpp-static - conda: - conda-cpp: @@ -121,7 +122,6 @@ x-hierarchy: - debian-go-cgo - debian-go-cgo-python - debian-java - - debian-java-jni - debian-js - fedora-cpp: - fedora-python @@ -162,6 +162,8 @@ x-hierarchy: volumes: almalinux-ccache: name: ${ARCH}-almalinux-ccache + alpine-linux-ccache: + name: ${ARCH}-alpine-linux-ccache conda-ccache: name: ${ARCH}-conda-ccache debian-ccache: @@ -192,6 +194,36 @@ services: # -e ARROW_TEST_LINKAGE=static \ # conda-cpp|debian-cpp|... + alpine-linux-cpp: + # Usage: + # docker-compose build alpine-linux-cpp + # docker-compose run --rm alpine-linux-cpp + # Parameters: + # ALPINE_LINUX: 3.16 + # ARCH: amd64, arm64v8, ... + image: ${REPO}:${ARCH}-alpine-linux-${ALPINE_LINUX}-cpp + build: + context: . + dockerfile: ci/docker/alpine-linux-${ALPINE_LINUX}-cpp.dockerfile + cache_from: + - ${REPO}:${ARCH}-alpine-linux-${ALPINE_LINUX}-cpp + args: + arch: ${ARCH} + shm_size: &shm-size 2G + ulimits: &ulimits + core: ${ULIMIT_CORE} + environment: + <<: *ccache + ARROW_ENABLE_TIMING_TESTS: # inherit + ARROW_MIMALLOC: "ON" + volumes: &alpine-linux-volumes + - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}alpine-linux-ccache:/ccache:delegated + command: >- + /bin/bash -c " + /arrow/ci/scripts/cpp_build.sh /arrow /build && + /arrow/ci/scripts/cpp_test.sh /arrow /build" + conda: # Base image for conda builds. # @@ -229,9 +261,8 @@ services: args: repo: ${REPO} arch: ${ARCH} - shm_size: &shm-size 2G - ulimits: &ulimits - core: ${ULIMIT_CORE} + shm_size: *shm-size + ulimits: *ulimits environment: <<: *ccache ARROW_BUILD_BENCHMARKS: "ON" @@ -455,9 +486,16 @@ services: llvm: ${LLVM} shm_size: *shm-size ulimits: *ulimits - environment: + environment: &cuda-environment <<: *ccache + ARROW_BUILD_STATIC: "OFF" ARROW_CUDA: "ON" + ARROW_GANDIVA: "OFF" + ARROW_GCS: "OFF" + ARROW_ORC: "OFF" + ARROW_S3: "OFF" + ARROW_SUBSTRAIT: "OFF" + ARROW_WITH_OPENTELEMETRY: "OFF" volumes: *ubuntu-volumes command: *cpp-command @@ -736,10 +774,10 @@ services: - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cuda-${CUDA}-python-3 args: base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cuda-${CUDA}-cpp + numba: ${NUMBA} shm_size: *shm-size environment: - <<: *ccache - ARROW_CUDA: "ON" + <<: [ *ccache, *cuda-environment ] volumes: *ubuntu-volumes command: &python-command > /bin/bash -c " @@ -960,10 +998,14 @@ services: <<: *ccache volumes: - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated command: - ["pip install -e /arrow/dev/archery && - /arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist"] + ["pip install -e /arrow/dev/archery && \ + /arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist && \ + source /opt/rh/rh-maven35/enable && \ + /arrow/ci/scripts/java_build.sh /arrow /build /arrow/java-dist && \ + /arrow/ci/scripts/java_test.sh /arrow /build /arrow/java-dist"] ############################## Integration ################################# @@ -1391,6 +1433,7 @@ services: args: arch: ${ARCH} go: ${GO} + staticcheck: ${STATICCHECK} shm_size: *shm-size volumes: *debian-volumes command: &go-command > @@ -1501,34 +1544,6 @@ services: /arrow/ci/scripts/java_build.sh /arrow /build && /arrow/ci/scripts/java_test.sh /arrow /build" - debian-java-jni: - # Includes plasma test, jni for gandiva and orc, and C data interface. - # Usage: - # docker-compose build debian-java - # docker-compose build debian-java-jni - # docker-compose run debian-java-jni - image: ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni - build: - context: . - dockerfile: ci/docker/linux-apt-jni.dockerfile - cache_from: - - ${REPO}:${ARCH}-debian-9-java-${JDK}-maven-${MAVEN}-jni - args: - llvm: ${LLVM} - shm_size: *shm-size - environment: - <<: *ccache - volumes: - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - - ${DOCKER_VOLUME_PREFIX}debian-ccache:/ccache:delegated - command: - /bin/bash -c " - /arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/java_jni_build.sh /arrow /build /tmp/java_dist && - /arrow/ci/scripts/java_build.sh /arrow /build /tmp/java_dist && - /arrow/ci/scripts/java_test.sh /arrow /build /tmp/java_dist" - oracle-java: # Usage: # docker-compose build oracle-java @@ -1599,6 +1614,7 @@ services: base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3 environment: <<: *ccache + ARROW_JAVA_SKIP_GIT_PLUGIN: ARROW_CUDA: "ON" BUILD_DOCS_C_GLIB: "ON" BUILD_DOCS_CPP: "ON" diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json index 1738afbf23967..013df079ec224 100644 --- a/docs/source/_static/versions.json +++ b/docs/source/_static/versions.json @@ -1,12 +1,16 @@ [ { - "name": "9.0 (dev)", + "name": "10.0 (dev)", "version": "dev/" }, { - "name": "8.0 (stable)", + "name": "9.0 (stable)", "version": "" }, + { + "name": "8.0", + "version": "8.0/" + }, { "name": "7.0", "version": "7.0/" diff --git a/docs/source/cpp/api/compute.rst b/docs/source/cpp/api/compute.rst index d64362a408202..288e280d0cf71 100644 --- a/docs/source/cpp/api/compute.rst +++ b/docs/source/cpp/api/compute.rst @@ -70,6 +70,11 @@ Streaming Execution Operators :members: :undoc-members: +.. doxygengroup:: execnode-components + :content-only: + :members: + :undoc-members: + Execution Plan Expressions -------------------------- diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index 95cfe4ce3fa08..d4a0c82cc61a0 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -23,7 +23,8 @@ Using Arrow C++ in your own project =================================== This section assumes you already have the Arrow C++ libraries on your -system, either after installing them using a package manager or after +system, either after `installing them using a package manager +`_ or after :ref:`building them yourself `. The recommended way to integrate the Arrow C++ libraries in your own diff --git a/docs/source/cpp/env_vars.rst b/docs/source/cpp/env_vars.rst index 235bc7e672e58..e4f8f9046a17a 100644 --- a/docs/source/cpp/env_vars.rst +++ b/docs/source/cpp/env_vars.rst @@ -92,7 +92,7 @@ that changing their value later will have an effect. Supported values are: - ``NONE`` disables any runtime-selected SIMD optimization; - - ``SSE4.2`` enables any SSE2-based optimizations until SSE4.2 (included); + - ``SSE4_2`` enables any SSE2-based optimizations until SSE4.2 (included); - ``AVX`` enables any AVX-based optimizations and earlier; - ``AVX2`` enables any AVX2-based optimizations and earlier; - ``AVX512`` enables any AVX512-based optimizations and earlier. diff --git a/docs/source/cpp/examples/dataset_skyhook_scan_example.rst b/docs/source/cpp/examples/dataset_skyhook_scan_example.rst new file mode 100644 index 0000000000000..75a3954cf3dbd --- /dev/null +++ b/docs/source/cpp/examples/dataset_skyhook_scan_example.rst @@ -0,0 +1,93 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. default-domain:: cpp +.. highlight:: cpp + +===================== +Arrow Skyhook example +===================== + +The file ``cpp/examples/arrow/dataset_skyhook_scan_example.cc`` +located inside the source tree contains an example of using Skyhook to +offload filters and projections to a Ceph cluster. + +Instuctions +=========== + +.. note:: + The instructions below are for Ubuntu 20.04 or above. + +1. Install Ceph and Skyhook dependencies. + + .. code-block:: bash + + apt update + apt install -y cmake \ + libradospp-dev \ + rados-objclass-dev \ + ceph \ + ceph-common \ + ceph-osd \ + ceph-mon \ + ceph-mgr \ + ceph-mds \ + rbd-mirror \ + ceph-fuse \ + rapidjson-dev \ + libboost-all-dev \ + python3-pip + +2. Build and install Skyhook. + + .. code-block:: bash + + git clone https://github.com/apache/arrow + cd arrow/ + mkdir -p cpp/release + cd cpp/release + cmake -DARROW_SKYHOOK=ON \ + -DARROW_PARQUET=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_BUILD_EXAMPLES=ON \ + -DARROW_DATASET=ON \ + -DARROW_CSV=ON \ + -DARROW_WITH_LZ4=ON \ + .. + + make -j install + cp release/libcls_skyhook.so /usr/lib/x86_64-linux-gnu/rados-classes/ + +3. Deploy a Ceph cluster with a single in-memory OSD using `this `_ script. + + .. code-block:: bash + + ./micro-osd.sh /tmp/skyhook + +4. Generate the example dataset. + + .. code-block:: bash + + pip install pandas pyarrow + python3 ../../ci/scripts/generate_dataset.py + cp -r nyc /mnt/cephfs/ + +5. Execute the example. + + .. code-block:: bash + + LD_LIBRARY_PATH=/usr/local/lib release/dataset-skyhook-scan-example file:///mnt/cephfs/nyc diff --git a/docs/source/cpp/streaming_execution.rst b/docs/source/cpp/streaming_execution.rst index e49225637df13..daa5f4be2f013 100644 --- a/docs/source/cpp/streaming_execution.rst +++ b/docs/source/cpp/streaming_execution.rst @@ -144,6 +144,17 @@ Join Relations join key is supported. * The ``post_join_filter`` property is not supported and will be ignored. +Aggregate Relations +^^^^^^^^^^^^^^^^^^^ + + * At most one grouping set is supported. + * Each grouping expression must be a direct reference. + * Each measure's arguments must be direct references. + * A measure may not have a filter + * A measure may not have sorts + * A measure's invocation must be AGGREGATION_INVOCATION_ALL + * A measure's phase must be AGGREGATION_PHASE_INITIAL_TO_RESULT + Expressions (general) ^^^^^^^^^^^^^^^^^^^^^ @@ -152,20 +163,128 @@ Expressions (general) grouping set. Acero typically expects these expressions to be direct references. Planners should extract the implicit projection into a formal project relation before delivering the plan to Acero. + * Older versions of Isthmus would omit optional arguments instead of including them + as unspecified enums. Acero will not support these plans. Literals ^^^^^^^^ * A literal with non-default nullability will cause a plan to be rejected. +Types +^^^^^ + + * Acero does not have full support for non-nullable types and may allow input + to have nulls without rejecting it. + * The table below shows the mapping between Arrow types and Substrait type + classes that are currently supported + +.. list-table:: Substrait / Arrow Type Mapping + :widths: 25 25 + :header-rows: 1 + + * - Substrait Type + - Arrow Type + - Caveat + * - boolean + - boolean + - + * - i8 + - int8 + - + * - i16 + - int16 + - + * - i16 + - int16 + - + * - i32 + - int32 + - + * - i64 + - int64 + - + * - fp32 + - float32 + - + * - fp64 + - float64 + - + * - string + - string + - + * - binary + - binary + - + * - timestamp + - timestamp + - + * - timestamp_tz + - timestamp + - + * - date + - date32 + - + * - time + - time64 + - + * - interval_year + - + - Not currently supported + * - interval_day + - + - Not currently supported + * - uuid + - + - Not currently supported + * - FIXEDCHAR + - + - Not currently supported + * - VARCHAR + - + - Not currently supported + * - FIXEDBINARY + - fixed_size_binary + - + * - DECIMAL + - decimal128 + - + * - STRUCT + - struct + - Arrow struct fields will have no name (empty string) + * - NSTRUCT + - + - Not currently supported + * - LIST + - list + - + * - MAP + - map + - K must not be nullable + Functions ^^^^^^^^^ - * The only functions currently supported by Acero are: - - * add - * equal - * is_not_distinct_from + * Acero does not support the legacy ``args`` style of declaring arguments + * The following functions have caveats or are not supported at all. Note that + this is not a comprehensive list. Functions are being added to Substrait at + a rapid pace and new functions may be missing. + + * Acero does not support the SATURATE option for overflow + * Acero does not support kernels that take more than two arguments + for the functions ``and``, ``or``, ``xor`` + * Acero does not support temporal arithmetic + * Acero does not support the following standard functions: + + * ``is_not_distinct_from`` + * ``like`` + * ``substring`` + * ``starts_with`` + * ``ends_with`` + * ``contains`` + * ``count`` + * ``count_distinct`` + * ``approx_count_distinct`` * The functions above must be referenced using the URI ``https://github.com/apache/arrow/blob/master/format/substrait/extension_types.yaml`` diff --git a/docs/source/developers/bug_reports.rst b/docs/source/developers/bug_reports.rst index 2a3cb6f3900c3..b2247db515bef 100644 --- a/docs/source/developers/bug_reports.rst +++ b/docs/source/developers/bug_reports.rst @@ -18,82 +18,64 @@ .. _bug-reports: ******************************** -Report bugs and propose features +Bug reports and feature requests ******************************** -Using the software and sharing your experience is a very helpful contribution -itself. Those who actively develop Arrow need feedback from users on what -works and what doesn't. Alerting us to unexpected behavior and missing features, -even if you can't solve the problems yourself, help us understand and prioritize -work to improve the libraries. +Arrow relies upon user feedback to identify defects and improvement +opportunities. All users are encouraged to participate by creating bug reports +and feature requests or commenting on existing issues. Even if you cannot +contribute solutions to the issues yourself, your feedback helps us understand +problems and prioritize work to improve the libraries. -We use `JIRA `_ -to manage our development "todo" list and to maintain changelogs for releases. -In addition, the project's `Confluence site `_ -has some useful higher-level views of the JIRA issues. +.. _apache-arrow-jira: -To create a JIRA issue, you'll need to have an account on the ASF JIRA, which -you can `sign yourself up for `_. -The JIRA server hosts bugs and issues for multiple Apache projects. The JIRA -project name for Arrow is "ARROW". +Apache Arrow Jira +================= -You don't need any special permissions on JIRA to be able to create issues. -Once you are more involved in the project and want to do more on JIRA, such as -assign yourself an issue, you will need "Contributor" permissions on the -Apache Arrow JIRA. To get this role, ask on the mailing list for a project -maintainer's help. +The Arrow project uses `Jira `_ +to track issues - both bug reports and feature requests. No account or +permissions are required to view or search Jira issues. The Jira server +hosts issue tracking for multiple Apache projects. The Jira project name for +Arrow is "ARROW". +.. _required-permissions: -.. _jira-tips: +Required permissions +++++++++++++++++++++ -Tips for using JIRA -=================== +Any registered Apache Software Foundation (ASF) Jira account may create or +assign Jira issues in the Apache Arrow project without additional permissions. +Individuals may +`create an ASF Jira account here `_. -Before you create a new issue, we recommend you first -`search `_ -among existing Arrow issues. +.. _creating-issues: -When reporting a new issue, follow these conventions to help make sure the -right people see it: +Creating issues +=============== -* Use the **Component** field to indicate the area of the project that your - issue pertains to (for example "Python" or "C++"). -* Also prefix the issue title with the component name in brackets, for example - ``[Python] issue name`` ; this helps when navigating lists of open issues, - and it also makes our changelogs more readable. Most prefixes are exactly the - same as the **Component** name, with the following exceptions: +Apache Arrow relies upon community contributions to address reported bugs and +feature requests. As with most software projects, contributor time and +resources are finite. The following guidelines aim to produce high-quality +bug reports and feature requests, enabling community contributors to respond +to more issues, faster: - * **Component:** Continuous Integration — **Summary prefix:** [CI] - * **Component:** Developer Tools — **Summary prefix:** [Dev] - * **Component:** Documentation — **Summary prefix:** [Docs] +.. _check-existing-issues: -* If you're reporting something that used to work in a previous version - but doesn't work in the current release, you can add the "Affects version" - field. For feature requests and other proposals, "Affects version" isn't - appropriate. +Check existing issues ++++++++++++++++++++++ -Project maintainers may later tweak formatting and labels to help improve their -visibility. They may add a "Fix version" to indicate that they're considering -it for inclusion in the next release, though adding that tag is not a -commitment that it will be done in the next release. - -.. _bug-report-tips: +Before you create a new issue, we recommend you first +`search `_ +for unresolved existing issues identifying the same problem or feature request. -Tips for successful bug reports -================================ +.. _describe-issue: -No one likes having bugs in their software, and in an ideal world, all bugs -would get fixed as soon as they were reported. However, time and attention are -finite, especially in an open-source project where most contributors are -participating in their spare time. All contributors in Apache projects are -volunteers and act as individuals, even if they are contributing to the project -as part of their job responsibilities. +Issue description ++++++++++++++++++ -In order for your bug to get prompt -attention, there are things you can do to make it easier for contributors to -reproduce and fix it. -**When you're reporting a bug, please help us understand the issue by providing, -to the best of your ability,** +A clear description of the problem or requested feature is the most important +element of any issue. An effective description helps developers understand +and efficiently engage on reported issues, and may include the following: * **Clear, minimal steps to reproduce the issue, with as few non-Arrow dependencies as possible.** If there's a problem on reading a file, try to @@ -103,13 +85,15 @@ to the best of your ability,** * Any relevant operating system, language, and library version information * If it isn't obvious, clearly state the expected behavior and what actually happened. +* Avoid overloading a single issue with multiple problems or feature requests. + Each issue should deal with a single bug or feature. If a developer can't get a failing unit test, they won't be able to know that the issue has been identified, and they won't know when it has been fixed. Try to anticipate the questions you might be asked by someone working to understand the issue and provide those supporting details up front. -Good reproducible examples or minimal bug reports can be found in next tabs: +Examples of good bug reports are found below: .. tab-set:: @@ -186,10 +170,104 @@ Good reproducible examples or minimal bug reports can be found in next tabs: #> 1 rows x 1 columns #> $x - -Other resources: +Other resources for producing useful bug reports: * `Python: Craft Minimal Bug Reports by Matthew Rocklin `_ * `R: Tidyverse: Make a reprex `_ * `R: Tidyverse's Reprex do's and don'ts `_ * `Mozilla's bug-reporting guidelines `_ + +.. _identify-component: + +Identify Arrow component +++++++++++++++++++++++++ + +Arrow is an expansive project supporting many languages and organized into a +number of components. Identifying the affected component(s) helps new issues +get attention from appropriate contributors. + +* Use the **Component** field to indicate the area of the project that your + issue pertains to (for example "Python" or "C++"). +* Also prefix the issue title with the component name in brackets, for example + ``[Python] issue summary`` ; this helps when navigating lists of open issues, + and it also makes our changelogs more readable. Most prefixes are exactly the + same as the **Component** name, with the following exceptions: + + * **Component:** Continuous Integration — **Summary prefix:** [CI] + * **Component:** Developer Tools — **Summary prefix:** [Dev] + * **Component:** Documentation — **Summary prefix:** [Docs] + +.. _affected-version: + +Identify affected version ++++++++++++++++++++++++++ + +If you're reporting something that used to work in a previous version +but doesn't work in the current release, you can add the **Affects version** +field to identify the earliest known version where the bug is observed. +For feature requests and other proposals, leave **Affects version** empty as +it is not applicable. + +.. _issue-lifecycle: + +Issue lifecycle +=============== + +Both bug reports and feature requests follow a defined lifecycle. The issue +**Status** field is used to document the current state of the issue, while the +**Resolution** field indicates the outcome of issues that have reached +terminal status. + + +.. _issue-status: + +Issue Status +++++++++++++ + +The Arrow project uses the following statuses in Jira to indicate what has - +and will be - done on an issue: + +* **Open** - This is the initial issue state, prior to a contributor assigning + the issue and starting progress. Issues in this state should be unassigned. +* **In progress** - At the time a contributor self-assigns an issue, the status + should be set to In progress by clicking the **Start progress** button. All + issues in this status should have an assignee - unassigned issues will be + set back to a status of Open. Issues remain "in progress" until resolved or + closed, including during review of pull requests. +* **Resolved** - This is a terminal status indicating action has been taken + on the issue, which is now considered completed. Issues in a resolved status + should have a resolution code set to **Fixed**. +* **Closed** - Another terminal status, Closed indicates the issue is complete, + but *without* action being taken. The following resolution codes apply to + issues in Closed status: + + * Won't Fix + * Duplicate + * Invalid + * Incomplete + * Cannot Reproduce + * Not a Problem + * Not a Bug + * Workaround + * Information Provided + * Works for Me + * Won't Do + * Abandoned + +* **Reopened** - When an issue has been closed or resolved, but additional + attention is needed, it may be reopened. + + +.. _issue-assignment: + +Issue assignment +++++++++++++++++ + +Assignment signals commitment to work on an issue, and contributors should +self-assign issues when that work starts. At the same time the issue is +assigned, the status field should be updated to **In Progress**. + +The Arrow project relies upon community contributors to resolve issues. We +recognize that priorities and plans may change, resulting in an issue assigned +to an individual who cannot attend to it. Assigned issues without updates in +the past 90 days will be unassigned and set to **Open** status. diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst index 83c774c3dcca2..b988bd2eebe11 100644 --- a/docs/source/developers/cpp/building.rst +++ b/docs/source/developers/cpp/building.rst @@ -323,6 +323,7 @@ boolean flags to ``cmake``. filesystems * ``-DARROW_FLIGHT=ON``: Arrow Flight RPC system, which depends at least on gRPC +* ``-DARROW_FLIGHT_SQL=ON``: Arrow Flight SQL * ``-DARROW_GANDIVA=ON``: Gandiva expression compiler, depends on LLVM, Protocol Buffers, and re2 * ``-DARROW_GANDIVA_JAVA=ON``: Gandiva JNI bindings for Java diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst index 91562a2c8cbb1..38907aeb7fe08 100644 --- a/docs/source/developers/cpp/windows.rst +++ b/docs/source/developers/cpp/windows.rst @@ -52,7 +52,7 @@ Using conda-forge for build dependencies ======================================== `Miniconda `_ is a minimal Python distribution -including the `conda `_ package manager. Some memers of the +including the `conda `_ package manager. Some members of the Apache Arrow community participate in the maintenance of `conda-forge `_, a community-maintained cross-platform package repository for conda. diff --git a/docs/source/developers/guide/communication.rst b/docs/source/developers/guide/communication.rst index 3677598833387..3ba0c3a2c34e6 100644 --- a/docs/source/developers/guide/communication.rst +++ b/docs/source/developers/guide/communication.rst @@ -78,7 +78,7 @@ might think is not a good idea. .. seealso:: - :ref:`create_jira` - - :ref:`jira-tips` and :ref:`bug-report-tips` + - :ref:`creating-issues` and :ref:`bug-report-tips` - If you want to **solve an issue that is already in JIRA**, you should connect with other contributors in the issue comments. diff --git a/docs/source/developers/guide/resources.rst b/docs/source/developers/guide/resources.rst index 43462591fd2cc..b53d104860d80 100644 --- a/docs/source/developers/guide/resources.rst +++ b/docs/source/developers/guide/resources.rst @@ -31,9 +31,10 @@ Additional information and resources On this page we have listed resources that may be relevant or useful for contributors who want to learn more about different parts of Apache Arrow. -.. Annotation Glossary -.. ------------------- -.. ARROW-15130 +Glossary +-------- +List of common terms in Apache Arrow project with a short description can +be found in :doc:`the glossary <../../format/Glossary>`. Additional information ---------------------- diff --git a/docs/source/developers/guide/step_by_step/finding_issues.rst b/docs/source/developers/guide/step_by_step/finding_issues.rst index 3363c69519b3f..537c345319a1d 100644 --- a/docs/source/developers/guide/step_by_step/finding_issues.rst +++ b/docs/source/developers/guide/step_by_step/finding_issues.rst @@ -80,16 +80,14 @@ If you are already in JIRA dashboard click the red ``create`` button in the top to do the same. You are ready to create the issue! Add a title and a description following -the :ref:`tips for using JIRA ` and you are ready to go! +the guidance in :ref:`creating issues ` and you are ready to go! .. seealso:: - :ref:`Tips for using JIRA ` + :ref:`Creating issues ` -You don’t need any special permissions on JIRA to be able to create issues. -Once you are more involved in the project and want to do more on JIRA, for -example assigning yourself an issue, you will need **“Contributor” permissions**. -To get this role, ask on the :ref:`mailing_list` or in the comment of the JIRA -issue you created. +You don’t need any special permissions on JIRA to be able to create +or self-assign issues. +To get this role, ask on the :ref:`dev mailing list `. When the ticket is created you can start a discussion about it in the JIRA comments section. diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst index 2824649253f11..add2b11b27807 100644 --- a/docs/source/developers/java/building.rst +++ b/docs/source/developers/java/building.rst @@ -220,3 +220,160 @@ Common Errors .. _Archery: https://github.com/apache/arrow/blob/master/dev/archery/README.md .. _Dependency Resolution: https://arrow.apache.org/docs/developers/cpp/building.html#individual-dependency-resolution .. _C++ shared libraries: https://arrow.apache.org/docs/cpp/build_system.html + + +Installing Nightly Packages +=========================== + +.. warning:: + These packages are not official releases. Use them at your own risk. + +Arrow nightly builds are posted on the mailing list at `builds@arrow.apache.org`_. +The artifacts are uploaded to GitHub. For example, for 2022/07/30, they can be found at `Github Nightly`_. + + +Installing from Apache Nightlies +-------------------------------- +1. Look up the nightly version number for the Arrow libraries used. + + For example, for ``arrow-memory``, visit https://nightlies.apache.org/arrow/java/org/apache/arrow/arrow-memory/ and see what versions are available (e.g. 9.0.0.dev501). +2. Add Apache Nightlies Repository to the Maven/Gradle project. + +.. code-block:: xml + + + 9.0.0.dev501 + + ... + + + arrow-apache-nightlies + https://nightlies.apache.org/arrow/java + + + ... + + + org.apache.arrow + arrow-vector + ${arrow.version} + + + ... + +Installing Manually +------------------- + +1. Decide nightly packages repository to use, for example: https://github.com/ursacomputing/crossbow/releases/tag/nightly-packaging-2022-07-30-0-github-java-jars +2. Add packages to your pom.xml, for example: flight-core (it depends on: arrow-format, arrow-vector, arrow-memeory-core and arrow-memory-netty). + +.. code-block:: xml + + + 8 + 8 + 9.0.0.dev501 + + + + + org.apache.arrow + flight-core + ${arrow.version} + + + +3. Download the necessary pom and jar files to a temporary directory: + +.. code-block:: shell + + $ mkdir nightly-packaging-2022-07-30-0-github-java-jars + $ cd nightly-packaging-2022-07-30-0-github-java-jars + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-java-root-9.0.0.dev501.pom + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-format-9.0.0.dev501.pom + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-format-9.0.0.dev501.jar + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-vector-9.0.0.dev501.pom + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-vector-9.0.0.dev501.jar + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-memory-9.0.0.dev501.pom + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-memory-core-9.0.0.dev501.pom + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-memory-netty-9.0.0.dev501.pom + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-memory-core-9.0.0.dev501.jar + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-memory-netty-9.0.0.dev501.jar + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/arrow-flight-9.0.0.dev501.pom + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/flight-core-9.0.0.dev501.pom + $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-packaging-2022-07-30-0-github-java-jars/flight-core-9.0.0.dev501.jar + $ tree + . + ├── arrow-flight-9.0.0.dev501.pom + ├── arrow-format-9.0.0.dev501.jar + ├── arrow-format-9.0.0.dev501.pom + ├── arrow-java-root-9.0.0.dev501.pom + ├── arrow-memory-9.0.0.dev501.pom + ├── arrow-memory-core-9.0.0.dev501.jar + ├── arrow-memory-core-9.0.0.dev501.pom + ├── arrow-memory-netty-9.0.0.dev501.jar + ├── arrow-memory-netty-9.0.0.dev501.pom + ├── arrow-vector-9.0.0.dev501.jar + ├── arrow-vector-9.0.0.dev501.pom + ├── flight-core-9.0.0.dev501.jar + └── flight-core-9.0.0.dev501.pom + +4. Install the artifacts to the local Maven repository with ``mvn install:install-file``: + +.. code-block:: shell + + $ mvn install:install-file -Dfile="$(pwd)/arrow-java-root-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-java-root -Dversion=9.0.0.dev501 -Dpackaging=pom + $ mvn install:install-file -Dfile="$(pwd)/arrow-format-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-format -Dversion=9.0.0.dev501 -Dpackaging=pom + $ mvn install:install-file -Dfile="$(pwd)/arrow-format-9.0.0.dev501.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-format -Dversion=9.0.0.dev501 -Dpackaging=jar + $ mvn install:install-file -Dfile="$(pwd)/arrow-vector-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-vector -Dversion=9.0.0.dev501 -Dpackaging=pom + $ mvn install:install-file -Dfile="$(pwd)/arrow-vector-9.0.0.dev501.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-vector -Dversion=9.0.0.dev501 -Dpackaging=jar + $ mvn install:install-file -Dfile="$(pwd)/arrow-memory-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-memory -Dversion=9.0.0.dev501 -Dpackaging=pom + $ mvn install:install-file -Dfile="$(pwd)/arrow-memory-core-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-core -Dversion=9.0.0.dev501 -Dpackaging=pom + $ mvn install:install-file -Dfile="$(pwd)/arrow-memory-netty-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-netty -Dversion=9.0.0.dev501 -Dpackaging=pom + $ mvn install:install-file -Dfile="$(pwd)/arrow-memory-core-9.0.0.dev501.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-core -Dversion=9.0.0.dev501 -Dpackaging=jar + $ mvn install:install-file -Dfile="$(pwd)/arrow-memory-netty-9.0.0.dev501.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-netty -Dversion=9.0.0.dev501 -Dpackaging=jar + $ mvn install:install-file -Dfile="$(pwd)/arrow-flight-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-flight -Dversion=9.0.0.dev501 -Dpackaging=pom + $ mvn install:install-file -Dfile="$(pwd)/flight-core-9.0.0.dev501.pom" -DgroupId=org.apache.arrow -DartifactId=flight-core -Dversion=9.0.0.dev501 -Dpackaging=pom + $ mvn install:install-file -Dfile="$(pwd)/flight-core-9.0.0.dev501.jar" -DgroupId=org.apache.arrow -DartifactId=flight-core -Dversion=9.0.0.dev501 -Dpackaging=jar + +5. Validate that the packages were installed: + +.. code-block:: shell + + $ tree ~/.m2/repository/org/apache/arrow + . + ├── arrow-flight + │   ├── 9.0.0.dev501 + │   │   └── arrow-flight-9.0.0.dev501.pom + ├── arrow-format + │   ├── 9.0.0.dev501 + │   │   ├── arrow-format-9.0.0.dev501.jar + │   │   └── arrow-format-9.0.0.dev501.pom + ├── arrow-java-root + │   ├── 9.0.0.dev501 + │   │   └── arrow-java-root-9.0.0.dev501.pom + ├── arrow-memory + │   ├── 9.0.0.dev501 + │   │   └── arrow-memory-9.0.0.dev501.pom + ├── arrow-memory-core + │   ├── 9.0.0.dev501 + │   │   ├── arrow-memory-core-9.0.0.dev501.jar + │   │   └── arrow-memory-core-9.0.0.dev501.pom + ├── arrow-memory-netty + │   ├── 9.0.0.dev501 + │   │   ├── arrow-memory-netty-9.0.0.dev501.jar + │   │   └── arrow-memory-netty-9.0.0.dev501.pom + ├── arrow-vector + │   ├── 9.0.0.dev501 + │   │   ├── _remote.repositories + │   │   ├── arrow-vector-9.0.0.dev501.jar + │   │   └── arrow-vector-9.0.0.dev501.pom + └── flight-core + ├── 9.0.0.dev501 + │   ├── flight-core-9.0.0.dev501.jar + │   └── flight-core-9.0.0.dev501.pom + +6. Compile your project like usual with ``mvn clean install``. + +.. _builds@arrow.apache.org: https://lists.apache.org/list.html?builds@arrow.apache.org +.. _Github Nightly: https://github.com/ursacomputing/crossbow/releases/tag/nightly-packaging-2022-07-30-0-github-java-jars diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst index 0cce2f83f5925..c30efd2358f0e 100644 --- a/docs/source/developers/python.rst +++ b/docs/source/developers/python.rst @@ -131,6 +131,30 @@ for ``.py`` files or for ``.pyx`` and ``.pxi`` files. In this case you will also need to install the `pytest-cython `_ plugin. +Testing PyArrow C++ +------------------- + +Most of the tests for PyArrow are part of the ``pytest``-based test suite mentioned above, +but a few low-level tests are written directly in C++ for historical reasons. +Those tests can be run using ``ctest``, but you first will need to build Arrow C++ +with ``-DARROW_BUILD_TESTS=ON``. + +.. note:: + + Currently, building the PyArrow C++ unit tests does not work with the + googletest package from conda-forge. If you are in this situation, please + add ``-DGTest_SOURCE=BUNDLED`` to the CMake flags + when building Arrow C++. + +After Arrow C++ and PyArrow are built, you can navigate to the ``python/build/dist`` +folder and run ``ctest``: + +.. code-block:: + + $ pushd arrow/python/build/dist + $ ctest + $ popd + Benchmarking ------------ @@ -388,9 +412,16 @@ Similarly, if you built with ``PARQUET_REQUIRE_ENCRYPTION`` (in C++), you need to set the corresponding ``PYARROW_WITH_PARQUET_ENCRYPTION`` environment variable to 1. -To set the number of threads used to compile PyArrow's C++/Cython components, +To set the number of threads used to compile PyArrow's C++/Cython components, set the ``PYARROW_PARALLEL`` environment variable. +.. note:: + + If you used a different directory name for building Arrow C++ (by default it is + named "build"), then you should also set the environment variable + ``ARROW_BUILD_DIR='name_of_build_dir'``. This way + PyArrow can find the Arrow C++ built files. + If you wish to delete stale PyArrow build artifacts before rebuilding, navigate to the ``arrow/python`` folder and run ``git clean -Xfd .``. @@ -604,3 +635,30 @@ Caveats ------- The Plasma component is not supported on Windows. + +Installing Nightly Packages +=========================== + +.. warning:: + These packages are not official releases. Use them at your own risk. + +PyArrow has nightly wheels and Conda packages for testing purposes. + +These may be suitable for downstream libraries in their continuous integration +setup to maintain compatibility with the upcoming PyArrow features, +deprecations and/or feature removals. + +Install the development version of PyArrow from `arrow-nightlies +`_ conda channel: + +.. code-block:: bash + + conda install -c arrow-nightlies pyarrow + +Install the development version from an `alternative PyPI +`_ index: + +.. code-block:: bash + + pip install --extra-index-url https://pypi.fury.io/arrow-nightlies/ \ + --prefer-binary --pre pyarrow diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst index 8b91cdaf324f2..f625f57b94c91 100644 --- a/docs/source/format/Integration.rst +++ b/docs/source/format/Integration.rst @@ -455,14 +455,16 @@ Gold File Integration Tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Pre-generated json and arrow IPC files (both file and stream format) exist -in the `arrow-testing ` repository +in the `arrow-testing `__ repository in the ``data/arrow-ipc-stream/integration`` directory. These serve as *gold* files that are assumed to be correct for use in testing. They are referenced by ``runner.py`` in the code for the :ref:`Archery ` utility. Below are the test cases which are covered by them: * Backwards Compatibility + - The following cases are tested using the 0.14.1 format: + + datetime + decimals + dictionaries @@ -472,10 +474,15 @@ utility. Below are the test cases which are covered by them: + primitives + primitive with no batches + primitive with zero length batches + - The following is tested for 0.17.1 format: + + unions + * Endianness + - The following cases are tested with both Little Endian and Big Endian versions for auto conversion + + custom metadata + datetime + decimals @@ -497,7 +504,10 @@ utility. Below are the test cases which are covered by them: + primitive batches with zero length + recursive nested types + union types + * Compression tests + - LZ4 - ZSTD + * Batches with Shared Dictionaries diff --git a/docs/source/java/cdata.rst b/docs/source/java/cdata.rst index 7e5c2df1c5e47..44e4f230ba65d 100644 --- a/docs/source/java/cdata.rst +++ b/docs/source/java/cdata.rst @@ -33,56 +33,57 @@ Python communication using the C Data Interface. Java to C++ ----------- -Example: Share an Int64 array from C++ to Java: - -**C++ Side** - See :doc:`../developers/cpp/building` to build the Arrow C++ libraries: .. code-block:: shell - $ git clone https://github.com/apache/arrow.git - $ cd arrow/cpp - $ mkdir build # from inside the `cpp` subdirectory - $ cd build - $ cmake .. --preset ninja-debug-minimal - $ cmake --build . - $ tree debug/ - debug/ - ├── libarrow.800.0.0.dylib - ├── libarrow.800.dylib -> libarrow.800.0.0.dylib - └── libarrow.dylib -> libarrow.800.dylib + $ git clone https://github.com/apache/arrow.git + $ cd arrow/cpp + $ mkdir build # from inside the `cpp` subdirectory + $ cd build + $ cmake .. --preset ninja-debug-minimal + $ cmake --build . + $ tree debug/ + debug/ + ├── libarrow.800.0.0.dylib + ├── libarrow.800.dylib -> libarrow.800.0.0.dylib + └── libarrow.dylib -> libarrow.800.dylib + +Share an Int64 array from C++ to Java +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**C++ Side** Implement a function in CDataCppBridge.h that exports an array via the C Data Interface: .. code-block:: cpp - #include - #include - #include - - void FillInt64Array(const uintptr_t c_schema_ptr, const uintptr_t c_array_ptr) { - arrow::Int64Builder builder; - builder.Append(1); - builder.Append(2); - builder.Append(3); - builder.AppendNull(); - builder.Append(5); - builder.Append(6); - builder.Append(7); - builder.Append(8); - builder.Append(9); - builder.Append(10); - std::shared_ptr array = *builder.Finish(); - - struct ArrowSchema* c_schema = reinterpret_cast(c_schema_ptr); - auto c_schema_status = arrow::ExportType(*array->type(), c_schema); - if (!c_schema_status.ok()) c_schema_status.Abort(); - - struct ArrowArray* c_array = reinterpret_cast(c_array_ptr); - auto c_array_status = arrow::ExportArray(*array, c_array); - if (!c_array_status.ok()) c_array_status.Abort(); - } + #include + #include + #include + + void FillInt64Array(const uintptr_t c_schema_ptr, const uintptr_t c_array_ptr) { + arrow::Int64Builder builder; + builder.Append(1); + builder.Append(2); + builder.Append(3); + builder.AppendNull(); + builder.Append(5); + builder.Append(6); + builder.Append(7); + builder.Append(8); + builder.Append(9); + builder.Append(10); + std::shared_ptr array = *builder.Finish(); + + struct ArrowSchema* c_schema = reinterpret_cast(c_schema_ptr); + auto c_schema_status = arrow::ExportType(*array->type(), c_schema); + if (!c_schema_status.ok()) c_schema_status.Abort(); + + struct ArrowArray* c_array = reinterpret_cast(c_array_ptr); + auto c_array_status = arrow::ExportArray(*array, c_array); + if (!c_array_status.ok()) c_array_status.Abort(); + } **Java Side** @@ -91,98 +92,98 @@ without writing JNI bindings ourselves. .. code-block:: xml - - - 4.0.0 - - org.example - java-cdata-example - 1.0-SNAPSHOT - - - 8 - 8 - 8.0.0 - - - - org.bytedeco - javacpp - 1.5.7 - - - org.apache.arrow - arrow-c-data - ${arrow.version} - - - org.apache.arrow - arrow-vector - ${arrow.version} - - - org.apache.arrow - arrow-memory-core - ${arrow.version} - - - org.apache.arrow - arrow-memory-netty - ${arrow.version} - - - org.apache.arrow - arrow-format - ${arrow.version} - - - + + + 4.0.0 + + org.example + java-cdata-example + 1.0-SNAPSHOT + + + 8 + 8 + 9.0.0 + + + + org.bytedeco + javacpp + 1.5.7 + + + org.apache.arrow + arrow-c-data + ${arrow.version} + + + org.apache.arrow + arrow-vector + ${arrow.version} + + + org.apache.arrow + arrow-memory-core + ${arrow.version} + + + org.apache.arrow + arrow-memory-netty + ${arrow.version} + + + org.apache.arrow + arrow-format + ${arrow.version} + + + .. code-block:: java - import org.bytedeco.javacpp.annotation.Platform; - import org.bytedeco.javacpp.annotation.Properties; - import org.bytedeco.javacpp.tools.InfoMap; - import org.bytedeco.javacpp.tools.InfoMapper; - - @Properties( - target = "CDataJavaToCppExample", - value = @Platform( - include = { - "CDataCppBridge.h" - }, - compiler = {"cpp11"}, - linkpath = {"/arrow/cpp/build/debug/"}, - link = {"arrow"} - ) - ) - public class CDataJavaConfig implements InfoMapper { - - @Override - public void map(InfoMap infoMap) { - } - } + import org.bytedeco.javacpp.annotation.Platform; + import org.bytedeco.javacpp.annotation.Properties; + import org.bytedeco.javacpp.tools.InfoMap; + import org.bytedeco.javacpp.tools.InfoMapper; + + @Properties( + target = "CDataJavaToCppExample", + value = @Platform( + include = { + "CDataCppBridge.h" + }, + compiler = {"cpp11"}, + linkpath = {"/arrow/cpp/build/debug/"}, + link = {"arrow"} + ) + ) + public class CDataJavaConfig implements InfoMapper { + + @Override + public void map(InfoMap infoMap) { + } + } .. code-block:: shell - # Compile our Java code - $ javac -cp javacpp-1.5.7.jar CDataJavaConfig.java + # Compile our Java code + $ javac -cp javacpp-1.5.7.jar CDataJavaConfig.java - # Generate CDataInterfaceLibrary - $ java -jar javacpp-1.5.7.jar CDataJavaConfig.java + # Generate CDataInterfaceLibrary + $ java -jar javacpp-1.5.7.jar CDataJavaConfig.java - # Generate libjniCDataInterfaceLibrary.dylib - $ java -jar javacpp-1.5.7.jar CDataJavaToCppExample.java + # Generate libjniCDataInterfaceLibrary.dylib + $ java -jar javacpp-1.5.7.jar CDataJavaToCppExample.java - # Validate libjniCDataInterfaceLibrary.dylib created - $ otool -L macosx-x86_64/libjniCDataJavaToCppExample.dylib - macosx-x86_64/libjniCDataJavaToCppExample.dylib: - libjniCDataJavaToCppExample.dylib (compatibility version 0.0.0, current version 0.0.0) - @rpath/libarrow.800.dylib (compatibility version 800.0.0, current version 800.0.0) - /usr/lib/libc++.1.dylib (compatibility version 1.0.0, current version 1200.3.0) - /usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1311.0.0) + # Validate libjniCDataInterfaceLibrary.dylib created + $ otool -L macosx-x86_64/libjniCDataJavaToCppExample.dylib + macosx-x86_64/libjniCDataJavaToCppExample.dylib: + libjniCDataJavaToCppExample.dylib (compatibility version 0.0.0, current version 0.0.0) + @rpath/libarrow.800.dylib (compatibility version 800.0.0, current version 800.0.0) + /usr/lib/libc++.1.dylib (compatibility version 1.0.0, current version 1200.3.0) + /usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1311.0.0) **Java Test** @@ -190,34 +191,280 @@ Let's create a Java class to test our bridge: .. code-block:: java - import org.apache.arrow.c.ArrowArray; - import org.apache.arrow.c.ArrowSchema; - import org.apache.arrow.c.Data; - import org.apache.arrow.memory.BufferAllocator; - import org.apache.arrow.memory.RootAllocator; - import org.apache.arrow.vector.BigIntVector; - - public class TestCDataInterface { - public static void main(String[] args) { - try( - BufferAllocator allocator = new RootAllocator(); - ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator); - ArrowArray arrowArray = ArrowArray.allocateNew(allocator) - ){ - CDataJavaToCppExample.FillInt64Array( - arrowSchema.memoryAddress(), arrowArray.memoryAddress()); - try( - BigIntVector bigIntVector = (BigIntVector) Data.importVector( - allocator, arrowArray, arrowSchema, null) - ){ - System.out.println("C++-allocated array: " + bigIntVector); - } - } - } - } + import org.apache.arrow.c.ArrowArray; + import org.apache.arrow.c.ArrowSchema; + import org.apache.arrow.c.Data; + import org.apache.arrow.memory.BufferAllocator; + import org.apache.arrow.memory.RootAllocator; + import org.apache.arrow.vector.BigIntVector; + + public class TestCDataInterface { + public static void main(String[] args) { + try( + BufferAllocator allocator = new RootAllocator(); + ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator); + ArrowArray arrowArray = ArrowArray.allocateNew(allocator) + ){ + CDataJavaToCppExample.FillInt64Array( + arrowSchema.memoryAddress(), arrowArray.memoryAddress()); + try( + BigIntVector bigIntVector = (BigIntVector) Data.importVector( + allocator, arrowArray, arrowSchema, null) + ){ + System.out.println("C++-allocated array: " + bigIntVector); + } + } + } + } + +.. code-block:: shell + + C++-allocated array: [1, 2, 3, null, 5, 6, 7, 8, 9, 10] + +Share an Int32 array from Java to C++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Java Side** + +For this example, we will build a JAR with all dependencies bundled. + +.. code-block:: xml + + + + 4.0.0 + org.example + cpptojava + 1.0-SNAPSHOT + + 8 + 8 + 9.0.0 + + + + org.apache.arrow + arrow-c-data + ${arrow.version} + + + org.apache.arrow + arrow-memory-netty + ${arrow.version} + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + package + + single + + + + jar-with-dependencies + + + + + + + + + +.. code-block:: java + + import org.apache.arrow.c.ArrowArray; + import org.apache.arrow.c.ArrowSchema; + import org.apache.arrow.c.Data; + import org.apache.arrow.memory.BufferAllocator; + import org.apache.arrow.memory.RootAllocator; + import org.apache.arrow.vector.FieldVector; + import org.apache.arrow.vector.IntVector; + import org.apache.arrow.vector.VectorSchemaRoot; + + import java.util.Arrays; + + public class ToBeCalledByCpp { + final static BufferAllocator allocator = new RootAllocator(); + + /** + * Create a {@link FieldVector} and export it via the C Data Interface + * @param schemaAddress Schema memory address to wrap + * @param arrayAddress Array memory address to wrap + */ + public static void fillVector(long schemaAddress, long arrayAddress){ + try (ArrowArray arrow_array = ArrowArray.wrap(arrayAddress); + ArrowSchema arrow_schema = ArrowSchema.wrap(schemaAddress) ) { + Data.exportVector(allocator, populateFieldVectorToExport(), null, arrow_array, arrow_schema); + } + } + + /** + * Create a {@link VectorSchemaRoot} and export it via the C Data Interface + * @param schemaAddress Schema memory address to wrap + * @param arrayAddress Array memory address to wrap + */ + public static void fillVectorSchemaRoot(long schemaAddress, long arrayAddress){ + try (ArrowArray arrow_array = ArrowArray.wrap(arrayAddress); + ArrowSchema arrow_schema = ArrowSchema.wrap(schemaAddress) ) { + Data.exportVectorSchemaRoot(allocator, populateVectorSchemaRootToExport(), null, arrow_array, arrow_schema); + } + } + + private static FieldVector populateFieldVectorToExport(){ + IntVector intVector = new IntVector("int-to-export", allocator); + intVector.allocateNew(3); + intVector.setSafe(0, 1); + intVector.setSafe(1, 2); + intVector.setSafe(2, 3); + intVector.setValueCount(3); + System.out.println("[Java] FieldVector: \n" + intVector); + return intVector; + } + + private static VectorSchemaRoot populateVectorSchemaRootToExport(){ + IntVector intVector = new IntVector("age-to-export", allocator); + intVector.setSafe(0, 10); + intVector.setSafe(1, 20); + intVector.setSafe(2, 30); + VectorSchemaRoot root = new VectorSchemaRoot(Arrays.asList(intVector)); + root.setRowCount(3); + System.out.println("[Java] VectorSchemaRoot: \n" + root.contentToTSVString()); + return root; + } + } + +Build the JAR and copy it to the C++ project. .. code-block:: shell - C++-allocated array: [1, 2, 3, null, 5, 6, 7, 8, 9, 10] + $ mvn clean install + $ cp target/cpptojava-1.0-SNAPSHOT-jar-with-dependencies.jar /cpptojava.jar + +**C++ Side** + +This application uses JNI to call Java code, but transfers data (zero-copy) via the C Data Interface instead. + +.. code-block:: cpp + + #include + #include + + #include + #include + + JNIEnv *CreateVM(JavaVM **jvm) { + JNIEnv *env; + JavaVMInitArgs vm_args; + JavaVMOption options[2]; + options[0].optionString = "-Djava.class.path=cpptojava.jar"; + options[1].optionString = "-DXcheck:jni:pedantic"; + vm_args.version = JNI_VERSION_1_8; + vm_args.nOptions = 2; + vm_args.options = options; + int status = JNI_CreateJavaVM(jvm, (void **) &env, &vm_args); + if (status < 0) { + std::cerr << "\n<<<<< Unable to Launch JVM >>>>>\n" << std::endl; + return nullptr; + } + return env; + } + + int main() { + JNIEnv *env; + JavaVM *jvm; + env = CreateVM(&jvm); + if (env == nullptr) return EXIT_FAILURE; + jclass javaClassToBeCalledByCpp = env->FindClass("ToBeCalledByCpp"); + if (javaClassToBeCalledByCpp != nullptr) { + jmethodID fillVector = env->GetStaticMethodID(javaClassToBeCalledByCpp, + "fillVector", + "(JJ)V"); + if (fillVector != nullptr) { + struct ArrowSchema arrowSchema; + struct ArrowArray arrowArray; + std::cout << "\n<<<<< C++ to Java for Arrays >>>>>\n" << std::endl; + env->CallStaticVoidMethod(javaClassToBeCalledByCpp, fillVector, + static_cast(reinterpret_cast(&arrowSchema)), + static_cast(reinterpret_cast(&arrowArray))); + auto resultImportArray = arrow::ImportArray(&arrowArray, &arrowSchema); + std::shared_ptr array = resultImportArray.ValueOrDie(); + std::cout << "[C++] Array: " << array->ToString() << std::endl; + } else { + std::cerr << "Could not find fillVector method\n" << std::endl; + return EXIT_FAILURE; + } + jmethodID fillVectorSchemaRoot = env->GetStaticMethodID(javaClassToBeCalledByCpp, + "fillVectorSchemaRoot", + "(JJ)V"); + if (fillVectorSchemaRoot != nullptr) { + struct ArrowSchema arrowSchema; + struct ArrowArray arrowArray; + std::cout << "\n<<<<< C++ to Java for RecordBatch >>>>>\n" << std::endl; + env->CallStaticVoidMethod(javaClassToBeCalledByCpp, fillVectorSchemaRoot, + static_cast(reinterpret_cast(&arrowSchema)), + static_cast(reinterpret_cast(&arrowArray))); + auto resultImportVectorSchemaRoot = arrow::ImportRecordBatch(&arrowArray, &arrowSchema); + std::shared_ptr recordBatch = resultImportVectorSchemaRoot.ValueOrDie(); + std::cout << "[C++] RecordBatch: " << recordBatch->ToString() << std::endl; + } else { + std::cerr << "Could not find fillVectorSchemaRoot method\n" << std::endl; + return EXIT_FAILURE; + } + } else { + std::cout << "Could not find ToBeCalledByCpp class\n" << std::endl; + return EXIT_FAILURE; + } + jvm->DestroyJavaVM(); + return EXIT_SUCCESS; + } + +CMakeLists.txt definition file: + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.19) + project(cdatacpptojava) + find_package(JNI REQUIRED) + find_package(Arrow REQUIRED) + message(STATUS "Arrow version: ${ARROW_VERSION}") + include_directories(${JNI_INCLUDE_DIRS}) + set(CMAKE_CXX_STANDARD 11) + add_executable(${PROJECT_NAME} main.cpp) + target_link_libraries(cdatacpptojava PRIVATE arrow_shared) + target_link_libraries(cdatacpptojava PRIVATE ${JNI_LIBRARIES}) + +**Result** + +.. code-block:: text + + <<<<< C++ to Java for Arrays >>>>> + [Java] FieldVector: + [1, 2, 3] + [C++] Array: [ + 1, + 2, + 3 + ] + + <<<<< C++ to Java for RecordBatch >>>>> + [Java] VectorSchemaRoot: + age-to-export + 10 + 20 + 30 + + [C++] RecordBatch: age-to-export: [ + 10, + 20, + 30 + ] .. _`JavaCPP`: https://github.com/bytedeco/javacpp diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst index 2951dc39594ed..9eaf2b5883415 100644 --- a/docs/source/java/install.rst +++ b/docs/source/java/install.rst @@ -55,7 +55,7 @@ arrow-vector, and arrow-memory-netty. demo 1.0-SNAPSHOT - 8.0.0 + 9.0.0 @@ -87,7 +87,7 @@ transitive dependencies of Flight. demo 1.0-SNAPSHOT - 8.0.0 + 9.0.0 @@ -134,158 +134,3 @@ Installing from Source ---------------------- See :ref:`java-development`. - -Installing Nightly Packages ---------------------------- - -.. warning:: - These packages are not official releases. Use them at your own risk. - -Arrow nightly builds are posted on the mailing list at `builds@arrow.apache.org`_. -The artifacts are uploaded to GitHub. For example, for 2022/03/01, they can be found at `Github Nightly`_. - -Installing from Apache Nightlies -******************************** -1. Look up the nightly version number for the Arrow libraries used. - - For example, for ``arrow-memory``, visit https://nightlies.apache.org/arrow/java/org/apache/arrow/arrow-memory/ and see what versions are available (e.g. 9.0.0.dev191). -2. Add Apache Nightlies Repository to the Maven/Gradle project. - -.. code-block:: xml - - - 9.0.0.dev191 - - ... - - - arrow-apache-nightlies - https://nightlies.apache.org/arrow/java - - - ... - - - org.apache.arrow - arrow-vector - ${arrow.version} - - - ... - -Installing Manually -******************* - -1. Decide nightly packages repository to use, for example: https://github.com/ursacomputing/crossbow/releases/tag/nightly-2022-03-19-0-github-java-jars -2. Add packages to your pom.xml, for example: flight-core (it depends on: arrow-format, arrow-vector, arrow-memeory-core and arrow-memory-netty). - -.. code-block:: xml - - - 8 - 8 - 8.0.0.dev254 - - - - - org.apache.arrow - flight-core - ${arrow.version} - - - -3. Download the necessary pom and jar files to a temporary directory: - -.. code-block:: shell - - $ mkdir nightly-2022-03-19-0-github-java-jars - $ cd nightly-2022-03-19-0-github-java-jars - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-java-root-8.0.0.dev254.pom - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-format-8.0.0.dev254.pom - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-format-8.0.0.dev254.jar - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-vector-8.0.0.dev254.pom - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-vector-8.0.0.dev254.jar - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-memory-8.0.0.dev254.pom - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-memory-core-8.0.0.dev254.pom - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-memory-netty-8.0.0.dev254.pom - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-memory-core-8.0.0.dev254.jar - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-memory-netty-8.0.0.dev254.jar - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/arrow-flight-8.0.0.dev254.pom - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/flight-core-8.0.0.dev254.pom - $ wget https://github.com/ursacomputing/crossbow/releases/download/nightly-2022-03-19-0-github-java-jars/flight-core-8.0.0.dev254.jar - $ tree - . - ├── arrow-flight-8.0.0.dev254.pom - ├── arrow-format-8.0.0.dev254.jar - ├── arrow-format-8.0.0.dev254.pom - ├── arrow-java-root-8.0.0.dev254.pom - ├── arrow-memory-8.0.0.dev254.pom - ├── arrow-memory-core-8.0.0.dev254.jar - ├── arrow-memory-core-8.0.0.dev254.pom - ├── arrow-memory-netty-8.0.0.dev254.jar - ├── arrow-memory-netty-8.0.0.dev254.pom - ├── arrow-vector-8.0.0.dev254.jar - ├── arrow-vector-8.0.0.dev254.pom - ├── flight-core-8.0.0.dev254.jar - └── flight-core-8.0.0.dev254.pom - -4. Install the artifacts to the local Maven repository with ``mvn install:install-file``: - -.. code-block:: shell - - $ mvn install:install-file -Dfile="$(pwd)/arrow-java-root-8.0.0.dev254.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-java-root -Dversion=8.0.0.dev254 -Dpackaging=pom - $ mvn install:install-file -Dfile="$(pwd)/arrow-format-8.0.0.dev254.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-format -Dversion=8.0.0.dev254 -Dpackaging=pom - $ mvn install:install-file -Dfile="$(pwd)/arrow-format-8.0.0.dev254.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-format -Dversion=8.0.0.dev254 -Dpackaging=jar - $ mvn install:install-file -Dfile="$(pwd)/arrow-vector-8.0.0.dev254.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-vector -Dversion=8.0.0.dev254 -Dpackaging=pom - $ mvn install:install-file -Dfile="$(pwd)/arrow-vector-8.0.0.dev254.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-vector -Dversion=8.0.0.dev254 -Dpackaging=jar - $ mvn install:install-file -Dfile="$(pwd)/arrow-memory-8.0.0.dev254.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-memory -Dversion=8.0.0.dev254 -Dpackaging=pom - $ mvn install:install-file -Dfile="$(pwd)/arrow-memory-core-8.0.0.dev254.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-core -Dversion=8.0.0.dev254 -Dpackaging=pom - $ mvn install:install-file -Dfile="$(pwd)/arrow-memory-netty-8.0.0.dev254.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-netty -Dversion=8.0.0.dev254 -Dpackaging=pom - $ mvn install:install-file -Dfile="$(pwd)/arrow-memory-core-8.0.0.dev254.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-core -Dversion=8.0.0.dev254 -Dpackaging=jar - $ mvn install:install-file -Dfile="$(pwd)/arrow-memory-netty-8.0.0.dev254.jar" -DgroupId=org.apache.arrow -DartifactId=arrow-memory-netty -Dversion=8.0.0.dev254 -Dpackaging=jar - $ mvn install:install-file -Dfile="$(pwd)/arrow-flight-8.0.0.dev254.pom" -DgroupId=org.apache.arrow -DartifactId=arrow-flight -Dversion=8.0.0.dev254 -Dpackaging=pom - $ mvn install:install-file -Dfile="$(pwd)/flight-core-8.0.0.dev254.pom" -DgroupId=org.apache.arrow -DartifactId=flight-core -Dversion=8.0.0.dev254 -Dpackaging=pom - $ mvn install:install-file -Dfile="$(pwd)/flight-core-8.0.0.dev254.jar" -DgroupId=org.apache.arrow -DartifactId=flight-core -Dversion=8.0.0.dev254 -Dpackaging=jar - -5. Validate that the packages were installed: - -.. code-block:: shell - - $ tree ~/.m2/repository/org/apache/arrow - . - ├── arrow-flight - │   ├── 8.0.0.dev254 - │   │   └── arrow-flight-8.0.0.dev254.pom - ├── arrow-format - │   ├── 8.0.0.dev254 - │   │   ├── arrow-format-8.0.0.dev254.jar - │   │   └── arrow-format-8.0.0.dev254.pom - ├── arrow-java-root - │   ├── 8.0.0.dev254 - │   │   └── arrow-java-root-8.0.0.dev254.pom - ├── arrow-memory - │   ├── 8.0.0.dev254 - │   │   └── arrow-memory-8.0.0.dev254.pom - ├── arrow-memory-core - │   ├── 8.0.0.dev254 - │   │   ├── arrow-memory-core-8.0.0.dev254.jar - │   │   └── arrow-memory-core-8.0.0.dev254.pom - ├── arrow-memory-netty - │   ├── 8.0.0.dev254 - │   │   ├── arrow-memory-netty-8.0.0.dev254.jar - │   │   └── arrow-memory-netty-8.0.0.dev254.pom - ├── arrow-vector - │   ├── 8.0.0.dev254 - │   │   ├── _remote.repositories - │   │   ├── arrow-vector-8.0.0.dev254.jar - │   │   └── arrow-vector-8.0.0.dev254.pom - └── flight-core - ├── 8.0.0.dev254 - │   ├── flight-core-8.0.0.dev254.jar - │   └── flight-core-8.0.0.dev254.pom - -6. Compile your project like usual with ``mvn clean install``. - -.. _builds@arrow.apache.org: https://lists.apache.org/list.html?builds@arrow.apache.org -.. _Github Nightly: https://github.com/ursacomputing/crossbow/releases/tag/nightly-2022-03-19-0-github-java-jars \ No newline at end of file diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst index d47a0970e934f..f884a9cc94b36 100644 --- a/docs/source/python/install.rst +++ b/docs/source/python/install.rst @@ -61,30 +61,3 @@ Installing from source ---------------------- See :ref:`python-development`. - -Installing Nightly Packages ---------------------------- - -.. warning:: - These packages are not official releases. Use them at your own risk. - -PyArrow has nightly wheels and conda packages for testing purposes. - -These may be suitable for downstream libraries in their continuous integration -setup to maintain compatibility with the upcoming PyArrow features, -deprecations and/or feature removals. - -Install the development version of PyArrow from `arrow-nightlies -`_ conda channel: - -.. code-block:: bash - - conda install -c arrow-nightlies pyarrow - -Install the development version from an `alternative PyPI -`_ index: - -.. code-block:: bash - - pip install --extra-index-url https://pypi.fury.io/arrow-nightlies/ \ - --prefer-binary --pre pyarrow diff --git a/docs/source/status.rst b/docs/source/status.rst index 3c35a582c6524..64e6b6923ffaa 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -40,13 +40,13 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+ | UInt8/16/32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ -| Float16 | | | ✓ | | | | ✓ | +| Float16 | | | ✓ | | | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ | Float32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ | Decimal128 | ✓ | ✓ | ✓ | | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ -| Decimal256 | ✓ | ✓ | | | ✓ | | ✓ | +| Decimal256 | ✓ | ✓ | ✓ | | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ | Date32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ @@ -77,15 +77,15 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+ | List | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ -| Large List | ✓ | ✓ | | | | ✓ | ✓ | +| Large List | ✓ | ✓ | ✓ | | | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ | Struct | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ -| Map | ✓ | ✓ | ✓ | ✓ | | | ✓ | +| Map | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ -| Dense Union | ✓ | ✓ | | | | | ✓ | +| Dense Union | ✓ | ✓ | ✓ | | | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ -| Sparse Union | ✓ | ✓ | | | | | ✓ | +| Sparse Union | ✓ | ✓ | ✓ | | | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ +-------------------+-------+-------+-------+------------+-------+-------+-------+ @@ -94,7 +94,7 @@ Data Types +===================+=======+=======+=======+============+=======+=======+=======+ | Dictionary | ✓ | ✓ (1) | ✓ | ✓ (1) | ✓ (1) | ✓ (1) | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ -| Extension | ✓ | ✓ | ✓ | | | | ✓ | +| Extension | ✓ | ✓ | ✓ | | | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ Notes: @@ -128,9 +128,9 @@ IPC Format +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ | Sparse tensors | ✓ | | | | | | | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ -| Buffer compression | ✓ | ✓ (3) | ✓ | | | | ✓ | +| Buffer compression | ✓ | ✓ (3) | ✓ | | | ✓ | ✓ | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ -| Endianness conversion | ✓ (2) | | | | | | | +| Endianness conversion | ✓ (2) | | ✓ (2) | | | | | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ | Custom schema metadata | ✓ | ✓ | ✓ | | ✓ | ✓ | ✓ | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ @@ -156,11 +156,11 @@ Flight RPC +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+ | Flight RPC Transport | C++ | Java | Go | JavaScript | C# | Rust | Julia | +============================================+=======+=======+=======+============+=======+=======+=======+ -| gRPC_ transport (grpc:, grpc+tcp:) | ✓ | ✓ | ✓ | | ✓ | | | +| gRPC_ transport (grpc:, grpc+tcp:) | ✓ | ✓ | ✓ | | ✓ | ✓ | | +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+ -| gRPC domain socket transport (grpc+unix:) | ✓ | ✓ | ✓ | | ✓ | | | +| gRPC domain socket transport (grpc+unix:) | ✓ | ✓ | ✓ | | ✓ | ✓ | | +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+ -| gRPC + TLS transport (grpc+tls:) | ✓ | ✓ | ✓ | | ✓ | | | +| gRPC + TLS transport (grpc+tls:) | ✓ | ✓ | ✓ | | ✓ | ✓ | | +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+ | UCX_ transport (ucx:) | ✓ | | | | | | | +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+ @@ -249,7 +249,7 @@ C Stream Interface | Feature | C++ | Python | R | Rust | Go | Java | C/GLib | Ruby | Julia | | | | | | | | | | | | +=============================+=====+========+===+======+====+======+========+======+=======+ -| Stream export | ✓ | ✓ | ✓ | ✓ | | | ✓ | ✓ | | +| Stream export | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | +-----------------------------+-----+--------+---+------+----+------+--------+------+-------+ | Stream import | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | +-----------------------------+-----+--------+---+------+----+------+--------+------+-------+ @@ -261,18 +261,18 @@ C Stream Interface Third-Party Data Formats ======================== -+-----------------------------+---------+---------+-------+------------+-------+---------+-------+ -| Format | C++ | Java | Go | JavaScript | C# | Rust | Julia | -| | | | | | | | | -+=============================+=========+=========+=======+============+=======+=========+=======+ -| Avro | | R | | | | | | -+-----------------------------+---------+---------+-------+------------+-------+---------+-------+ -| CSV | R/W | | R/W | | | R/W | R/W | -+-----------------------------+---------+---------+-------+------------+-------+---------+-------+ -| ORC | R/W | R (2) | | | | | | -+-----------------------------+---------+---------+-------+------------+-------+---------+-------+ -| Parquet | R/W | R (3) | R/W | | | R/W (1) | | -+-----------------------------+---------+---------+-------+------------+-------+---------+-------+ ++-----------------------------+---------+---------+-------+------------+-------+-------+-------+ +| Format | C++ | Java | Go | JavaScript | C# | Rust | Julia | +| | | | | | | | | ++=============================+=========+=========+=======+============+=======+=======+=======+ +| Avro | | R | | | | | | ++-----------------------------+---------+---------+-------+------------+-------+-------+-------+ +| CSV | R/W | | R/W | | | R/W | R/W | ++-----------------------------+---------+---------+-------+------------+-------+-------+-------+ +| ORC | R/W | R (1) | | | | | | ++-----------------------------+---------+---------+-------+------------+-------+-------+-------+ +| Parquet | R/W | R (2) | R/W | | | R/W | | ++-----------------------------+---------+---------+-------+------------+-------+-------+-------+ Notes: @@ -280,8 +280,6 @@ Notes: * *W* = Write supported -* \(1) Nested read/write not supported. +* \(1) Through JNI bindings. (Provided by ``org.apache.arrow.orc:arrow-orc``) -* \(2) Through JNI bindings. (Provided by ``org.apache.arrow.orc:arrow-orc``) - -* \(3) Through JNI bindings to Arrow C++ Datasets. (Provided by ``org.apache.arrow:arrow-dataset``) +* \(2) Through JNI bindings to Arrow C++ Datasets. (Provided by ``org.apache.arrow:arrow-dataset``) diff --git a/experimental/computeir/Expression.fbs b/experimental/computeir/Expression.fbs deleted file mode 100644 index e3a7fb4eb0378..0000000000000 --- a/experimental/computeir/Expression.fbs +++ /dev/null @@ -1,222 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -include "../../format/Schema.fbs"; -include "Literal.fbs"; - -namespace org.apache.arrow.computeir.flatbuf; - -/// Access a value for a given map key -table MapKey { - /// Any expression can be a map key. - key: Expression (required); -} - -/// Struct field access -table StructField { - /// The position of the field in the struct schema - position: uint32; -} - -/// Zero-based array index -table ArraySubscript { - position: uint32; -} - -/// Zero-based range of elements in an array -table ArraySlice { - /// The start of an array slice, inclusive - start_inclusive: uint32; - /// The end of an array slice, exclusive - end_exclusive: uint32; -} - -/// Field name in a relation, in ordinal position of the relation's schema. -table FieldIndex { - position: uint32; -} - -/// A union of possible dereference operations -union Deref { - /// Access a value for a given map key - MapKey, - /// Access the value at a struct field - StructField, - /// Access the element at a given index in an array - ArraySubscript, - /// Access a range of elements in an array - ArraySlice, - /// Access a field of a relation - FieldIndex, -} - -/// Access the data of a field -table FieldRef { - ref: Deref (required); - /// For Expressions which might reference fields in multiple Relations, - /// this index may be provided to indicate which Relation's fields - /// `ref` points into. For example in the case of a join, - /// 0 refers to the left relation and 1 to the right relation. - relation_index: int = 0; -} - -/// A function call expression -table Call { - /// The function to call - name: string (required); - - /// The arguments passed to `name`. - arguments: [Expression] (required); - - /// Possible ordering of input. These are useful - /// in aggregates where ordering in meaningful such as - /// string concatenation - orderings: [SortKey]; -} - -/// A single WHEN x THEN y fragment. -table CaseFragment { - match: Expression (required); - result: Expression (required); -} - -/// Conditional case statement expression -table ConditionalCase { - /// List of conditions to evaluate - conditions: [CaseFragment] (required); - /// The default value if no cases match. This is typically NULL in SQL - /// implementations. - /// - /// Defaulting to NULL is a frontend choice, so producers must specify NULL - /// if that's their desired behavior. - else: Expression (required); -} - -/// Switch-style case expression -table SimpleCase { - /// The expression whose value will be matched - expression: Expression (required); - /// Matches for `expression` - matches: [CaseFragment] (required); - /// The default value if no cases match - else: Expression (required); -} - -/// Whether lesser values should precede greater or vice versa, -/// also whether nulls should preced or follow values -enum Ordering : uint8 { - ASCENDING_THEN_NULLS, - DESCENDING_THEN_NULLS, - NULLS_THEN_ASCENDING, - NULLS_THEN_DESCENDING, -} - -/// An expression with an order -table SortKey { - expression: Expression (required); - ordering: Ordering = ASCENDING_THEN_NULLS; -} - -/// An unbounded window bound -table Unbounded {} - -/// A concrete bound, which can be an expression or unbounded -union ConcreteBoundImpl { - Expression, - Unbounded, -} - -/// Boundary is preceding rows, determined by the contained expression -table Preceding { - impl: ConcreteBoundImpl (required); -} - -/// Boundary is following rows, determined by the contained expression -table Following { - impl: ConcreteBoundImpl (required); -} - -/// Boundary is the current row -table CurrentRow {} - -union Bound { - Preceding, - Following, - CurrentRow, -} - -/// The kind of window function to be executed -enum Frame : uint8 { - Rows, - Range, -} - -/// An expression representing a window function call. -table WindowCall { - /// The expression to operate over - expression: Expression (required); - /// The kind of window frame - kind: Frame; - /// Partition keys - partitions: [Expression] (required); - /// Sort keys - orderings: [SortKey] (required); - /// Lower window bound - lower_bound: Bound (required); - /// Upper window bound - upper_bound: Bound (required); -} - -/// A cast expression -table Cast { - /// The expression to cast - operand: Expression (required); - /// The type to cast to. This value is a `Field` to allow complete representation - /// of arrow types. - /// - /// `Type` is unable to completely represent complex types like lists and - /// maps. - to: org.apache.arrow.flatbuf.Field (required); -} - -/// Various expression types -/// -/// WindowCall is a separate variant -/// due to special options for each that don't apply to generic -/// function calls. Again this is done to make it easier -/// for consumers to deal with the structure of the operation -union ExpressionImpl { - Literal, - FieldRef, - Call, - ConditionalCase, - SimpleCase, - WindowCall, - Cast, -} - -/// Expression types -/// -/// Expressions have a concrete `impl` value, which is a specific operation. -/// -/// This is a workaround for flatbuffers' lack of support for direct use of -/// union types. -table Expression { - impl: ExpressionImpl (required); -} - -root_type Expression; diff --git a/experimental/computeir/Literal.fbs b/experimental/computeir/Literal.fbs deleted file mode 100644 index a966b6ece3fc0..0000000000000 --- a/experimental/computeir/Literal.fbs +++ /dev/null @@ -1,184 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -include "../../format/Schema.fbs"; - -namespace org.apache.arrow.computeir.flatbuf; - -table ListLiteral { - values: [Literal] (required); -} - -table StructLiteral { - /// Values for each struct field; the order must match the order of fields - /// in the `type` field of `Literal`. - values: [Literal] (required); -} - -table KeyValue { - key: Literal (required); - value: Literal (required); -} - -table MapLiteral { - values: [KeyValue] (required); -} - -table Int8Literal { - value: int8; -} - -table Int16Literal { - value: int16; -} - -table Int32Literal { - value: int32; -} - -table Int64Literal { - value: int64; -} - -table UInt8Literal { - value: uint8; -} - -table UInt16Literal { - value: uint16; -} - -table UInt32Literal { - value: uint32; -} - -table UInt64Literal { - value: uint64; -} - -table Float16Literal { - value: uint16; -} - -table Float32Literal { - value: float32; -} - -table Float64Literal { - value: float64; -} - -table DecimalLiteral { - /// Bytes of a Decimal value; bytes must be in little-endian order. - value: [byte] (required); -} - -table BooleanLiteral { - value: bool; -} - -table DateLiteral { - value: int64; -} - -table TimeLiteral { - value: int64; -} - -table TimestampLiteral { - value: int64; -} - -table IntervalLiteralMonths { - months: int32; -} - -table IntervalLiteralDaysMilliseconds { - days: int32; - milliseconds: int32; -} - -union IntervalLiteralImpl { - IntervalLiteralMonths, - IntervalLiteralDaysMilliseconds, -} - -table IntervalLiteral { - value: IntervalLiteralImpl (required); -} - -table DurationLiteral { - value: int64; -} - -table BinaryLiteral { - value: [byte] (required); -} - -table FixedSizeBinaryLiteral { - value: [byte] (required); -} - -table StringLiteral { - value: string (required); -} - -// no union literal is defined as only one branch of a union can be resolved. -// no literals for large string/binary types as flatbuffer is limited to 2gb. - -union LiteralImpl { - BooleanLiteral, - - Int8Literal, - Int16Literal, - Int32Literal, - Int64Literal, - - UInt8Literal, - UInt16Literal, - UInt32Literal, - UInt64Literal, - - DateLiteral, - TimeLiteral, - TimestampLiteral, - IntervalLiteral, - DurationLiteral, - - DecimalLiteral, - - Float16Literal, - Float32Literal, - Float64Literal, - - ListLiteral, - StructLiteral, - MapLiteral, - - StringLiteral, - BinaryLiteral, - FixedSizeBinaryLiteral, -} - -table Literal { - /// Literal value data; for null literals do not include this field. - impl: LiteralImpl; - /// Type of the literal value. This must match `impl`. - type: org.apache.arrow.flatbuf.Field (required); -} - -root_type Literal; diff --git a/experimental/computeir/Relation.fbs b/experimental/computeir/Relation.fbs deleted file mode 100644 index 308dcdb9ae17a..0000000000000 --- a/experimental/computeir/Relation.fbs +++ /dev/null @@ -1,218 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -include "../../format/Schema.fbs"; -include "Literal.fbs"; -include "Expression.fbs"; - -namespace org.apache.arrow.computeir.flatbuf; - -/// An identifier for relations in a query. -/// -/// A table is used here to allow plan implementations optionality. -table RelId { - id: uint64; -} - -/// Filter operation -table Filter { - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - id: RelId; - /// Child relation - rel: Relation (required); - /// The expression which will be evaluated against input rows - /// to determine whether they should be excluded from the - /// filter relation's output. - predicate: Expression (required); -} - -/// Projection -table Project { - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - id: RelId; - /// Child relation - rel: Relation (required); - /// Expressions which will be evaluated to produce to - /// the rows of the project relation's output. - expressions: [Expression] (required); -} - -/// A set of grouping keys -table Grouping { - /// Expressions to group by - keys: [Expression] (required); -} - -/// Aggregate operation -table Aggregate { - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - id: RelId; - /// Child relation - rel: Relation (required); - /// Expressions which will be evaluated to produce to - /// the rows of the aggregate relation's output. - measures: [Expression] (required); - /// Keys by which `aggregations` will be grouped. - /// - /// The nested list here is to support grouping sets - /// eg - /// - /// SELECT a, b, c, sum(d) - /// FROM t - /// GROUP BY - /// GROUPING SETS ( - /// (a, b, c), - /// (a, b), - /// (a), - /// () - /// ); - groupings: [Grouping] (required); -} - -enum JoinKind : uint8 { - Anti, - Cross, - FullOuter, - Inner, - LeftOuter, - LeftSemi, - RightOuter, -} - -/// Join between two tables -table Join { - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - id: RelId; - /// Left relation - left: Relation (required); - /// Right relation - right: Relation (required); - /// The expression which will be evaluated against rows from each - /// input to determine whether they should be included in the - /// join relation's output. - on_expression: Expression (required); - /// The kind of join to use. - join_kind: JoinKind; -} - -/// Order by relation -table OrderBy { - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - id: RelId; - /// Child relation - rel: Relation (required); - /// Define sort order for rows of output. - /// Keys with higher precedence are ordered ahead of other keys. - keys: [SortKey] (required); -} - -/// Limit operation -table Limit { - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - id: RelId; - /// Child relation - rel: Relation (required); - /// Starting index of rows - offset: uint32; - /// The maximum number of rows of output. - count: uint32; -} - -/// The kind of set operation being performed. -enum SetOpKind : uint8 { - Union, - Intersection, - Difference, -} - -/// A set operation on two or more relations -table SetOperation { - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - id: RelId; - /// Child relations - rels: [Relation] (required); - /// The kind of set operation - set_op: SetOpKind; -} - -/// A single column of literal values. -table LiteralColumn { - /// The literal values of the column - elements: [Literal] (required); -} - -/// Literal relation -table LiteralRelation { - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - id: RelId; - /// The columns of this literal relation. - columns: [LiteralColumn] (required); -} - -/// An external source of tabular data -table Source { - /// An identifiier for the relation. The identifier should be unique over the - /// entire plan. Optional. - id: RelId; - name: string (required); - /// An optional expression used to filter out rows directly from the source. - /// - /// Useful for consumers that implement predicate pushdown. - /// - /// A missing filter value indicates no filter, i.e., all rows are - /// returned from the source. - filter: Expression; - /// Schemas are explicitly optional - schema: org.apache.arrow.flatbuf.Schema; - /// An optional list of field indices indicating which columns should be read - /// from the source. Columns excluded from this listing will instead be replaced - /// with all-null placeholders to guarantee that the schema of the source is - /// unaffected by this projection. - /// - /// A missing value indicates all columns should be read. - /// - /// The behavior of an empty list is undefined. - projection: [FieldIndex]; -} - -/// The varieties of relations -union RelationImpl { - Aggregate, - Filter, - Join, - Limit, - LiteralRelation, - OrderBy, - Project, - SetOperation, - Source, -} - -/// A table holding an instance of the possible relation types. -table Relation { - impl: RelationImpl (required); -} - -root_type Relation; diff --git a/format/Flight.proto b/format/Flight.proto index 87e5fda796d53..635b1793d2bab 100644 --- a/format/Flight.proto +++ b/format/Flight.proto @@ -19,7 +19,7 @@ syntax = "proto3"; option java_package = "org.apache.arrow.flight.impl"; -option go_package = "github.com/apache/arrow/go/flight;flight"; +option go_package = "github.com/apache/arrow/go/arrow/flight/internal/flight"; option csharp_namespace = "Apache.Arrow.Flight.Protocol"; package arrow.flight.protocol; diff --git a/format/FlightSql.proto b/format/FlightSql.proto index 149f8831e139f..859427b68804b 100644 --- a/format/FlightSql.proto +++ b/format/FlightSql.proto @@ -20,6 +20,7 @@ syntax = "proto3"; import "google/protobuf/descriptor.proto"; option java_package = "org.apache.arrow.flight.sql.impl"; +option go_package = "github.com/apache/arrow/go/arrow/flight/internal/flight"; package arrow.flight.protocol.sql; /* diff --git a/go.work b/go.work new file mode 100644 index 0000000000000..ccfe97d5510a4 --- /dev/null +++ b/go.work @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +go 1.18 + +use ( + ./go + ./go/arrow/compute +) diff --git a/go/arrow/_examples/helloworld/main.go b/go/arrow/_examples/helloworld/main.go index af56c0eba5bdb..c79f3f0202a6b 100644 --- a/go/arrow/_examples/helloworld/main.go +++ b/go/arrow/_examples/helloworld/main.go @@ -17,9 +17,9 @@ package main import ( - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/math" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/math" + "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { diff --git a/go/arrow/array.go b/go/arrow/array.go index d8983c29f2fc7..52c8998c794f5 100644 --- a/go/arrow/array.go +++ b/go/arrow/array.go @@ -20,7 +20,7 @@ import ( "encoding/json" "fmt" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/memory" ) // ArrayData is the underlying memory and metadata of an Arrow array, corresponding diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go index 47e5a500398cc..7db2a8f004bf6 100644 --- a/go/arrow/array/array.go +++ b/go/arrow/array/array.go @@ -19,9 +19,9 @@ package array import ( "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" ) type arraymarshal interface { @@ -114,10 +114,6 @@ var ( makeArrayFn [64]arrayConstructorFn ) -func unsupportedArrayType(data arrow.ArrayData) arrow.Array { - panic("unsupported data type: " + data.DataType().ID().String()) -} - func invalidDataType(data arrow.ArrayData) arrow.Array { panic("invalid data type: " + data.DataType().ID().String()) } @@ -166,19 +162,19 @@ func init() { arrow.INTERVAL_MONTHS: func(data arrow.ArrayData) arrow.Array { return NewMonthIntervalData(data) }, arrow.INTERVAL_DAY_TIME: func(data arrow.ArrayData) arrow.Array { return NewDayTimeIntervalData(data) }, arrow.DECIMAL128: func(data arrow.ArrayData) arrow.Array { return NewDecimal128Data(data) }, - arrow.DECIMAL256: unsupportedArrayType, + arrow.DECIMAL256: func(data arrow.ArrayData) arrow.Array { return NewDecimal256Data(data) }, arrow.LIST: func(data arrow.ArrayData) arrow.Array { return NewListData(data) }, arrow.STRUCT: func(data arrow.ArrayData) arrow.Array { return NewStructData(data) }, - arrow.SPARSE_UNION: unsupportedArrayType, - arrow.DENSE_UNION: unsupportedArrayType, + arrow.SPARSE_UNION: func(data arrow.ArrayData) arrow.Array { return NewSparseUnionData(data) }, + arrow.DENSE_UNION: func(data arrow.ArrayData) arrow.Array { return NewDenseUnionData(data) }, arrow.DICTIONARY: func(data arrow.ArrayData) arrow.Array { return NewDictionaryData(data) }, arrow.MAP: func(data arrow.ArrayData) arrow.Array { return NewMapData(data) }, arrow.EXTENSION: func(data arrow.ArrayData) arrow.Array { return NewExtensionData(data) }, arrow.FIXED_SIZE_LIST: func(data arrow.ArrayData) arrow.Array { return NewFixedSizeListData(data) }, arrow.DURATION: func(data arrow.ArrayData) arrow.Array { return NewDurationData(data) }, - arrow.LARGE_STRING: unsupportedArrayType, - arrow.LARGE_BINARY: unsupportedArrayType, - arrow.LARGE_LIST: unsupportedArrayType, + arrow.LARGE_STRING: func(data arrow.ArrayData) arrow.Array { return NewLargeStringData(data) }, + arrow.LARGE_BINARY: func(data arrow.ArrayData) arrow.Array { return NewLargeBinaryData(data) }, + arrow.LARGE_LIST: func(data arrow.ArrayData) arrow.Array { return NewLargeListData(data) }, arrow.INTERVAL: func(data arrow.ArrayData) arrow.Array { return NewIntervalData(data) }, arrow.INTERVAL_MONTH_DAY_NANO: func(data arrow.ArrayData) arrow.Array { return NewMonthDayNanoIntervalData(data) }, diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go index 7820218aabce0..f2cee669fa3a8 100644 --- a/go/arrow/array/array_test.go +++ b/go/arrow/array/array_test.go @@ -19,11 +19,11 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v9/arrow/internal/testing/types" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) @@ -36,6 +36,7 @@ func (d *testDataType) Name() string { panic("implement me") } func (d *testDataType) BitWidth() int { return 8 } func (d *testDataType) Fingerprint() string { return "" } func (testDataType) Layout() arrow.DataTypeLayout { return arrow.DataTypeLayout{} } +func (testDataType) String() string { return "" } func TestMakeFromData(t *testing.T) { tests := []struct { @@ -62,6 +63,8 @@ func TestMakeFromData(t *testing.T) { {name: "float64", d: &testDataType{arrow.FLOAT64}}, {name: "string", d: &testDataType{arrow.STRING}, size: 3}, {name: "binary", d: &testDataType{arrow.BINARY}, size: 3}, + {name: "large_string", d: &testDataType{arrow.LARGE_STRING}, size: 3}, + {name: "large_binary", d: &testDataType{arrow.LARGE_BINARY}, size: 3}, {name: "fixed_size_binary", d: &testDataType{arrow.FIXED_SIZE_BINARY}}, {name: "date32", d: &testDataType{arrow.DATE32}}, {name: "date64", d: &testDataType{arrow.DATE64}}, @@ -71,6 +74,7 @@ func TestMakeFromData(t *testing.T) { {name: "month_interval", d: arrow.FixedWidthTypes.MonthInterval}, {name: "day_time_interval", d: arrow.FixedWidthTypes.DayTimeInterval}, {name: "decimal128", d: &testDataType{arrow.DECIMAL128}}, + {name: "decimal256", d: &testDataType{arrow.DECIMAL256}}, {name: "month_day_nano_interval", d: arrow.FixedWidthTypes.MonthDayNanoInterval}, {name: "list", d: &testDataType{arrow.LIST}, child: []arrow.ArrayData{ @@ -78,6 +82,11 @@ func TestMakeFromData(t *testing.T) { array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), }}, + {name: "large list", d: &testDataType{arrow.LARGE_LIST}, child: []arrow.ArrayData{ + array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), + array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), + }}, + {name: "struct", d: &testDataType{arrow.STRUCT}}, {name: "struct", d: &testDataType{arrow.STRUCT}, child: []arrow.ArrayData{ array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), @@ -97,6 +106,9 @@ func TestMakeFromData(t *testing.T) { }, 0 /* nulls */, 0 /* offset */)}, }, + {name: "sparse union", d: arrow.SparseUnionOf(nil, nil), child: []arrow.ArrayData{}, size: 2}, + {name: "dense union", d: arrow.DenseUnionOf(nil, nil), child: []arrow.ArrayData{}, size: 3}, + // various dictionary index types and value types {name: "dictionary", d: &testDataType{arrow.DICTIONARY}, expPanic: true, expError: "arrow/array: no dictionary set in Data for Dictionary array"}, {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: &testDataType{arrow.INT64}}, dict: array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, @@ -111,14 +123,6 @@ func TestMakeFromData(t *testing.T) { {name: "extension", d: &testDataType{arrow.EXTENSION}, expPanic: true, expError: "arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType"}, {name: "extension", d: types.NewUUIDType()}, - // unsupported types - {name: "sparse union", d: &testDataType{arrow.SPARSE_UNION}, expPanic: true, expError: "unsupported data type: SPARSE_UNION"}, - {name: "dense union", d: &testDataType{arrow.DENSE_UNION}, expPanic: true, expError: "unsupported data type: DENSE_UNION"}, - {name: "large string", d: &testDataType{arrow.LARGE_STRING}, expPanic: true, expError: "unsupported data type: LARGE_STRING"}, - {name: "large binary", d: &testDataType{arrow.LARGE_BINARY}, expPanic: true, expError: "unsupported data type: LARGE_BINARY"}, - {name: "large list", d: &testDataType{arrow.LARGE_LIST}, expPanic: true, expError: "unsupported data type: LARGE_LIST"}, - {name: "decimal256", d: &testDataType{arrow.DECIMAL256}, expPanic: true, expError: "unsupported data type: DECIMAL256"}, - // invalid types {name: "invalid(-1)", d: &testDataType{arrow.Type(-1)}, expPanic: true, expError: "invalid data type: Type(-1)"}, {name: "invalid(63)", d: &testDataType{arrow.Type(63)}, expPanic: true, expError: "invalid data type: Type(63)"}, diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go index b2281f4df98fc..0ce181e9d77c5 100644 --- a/go/arrow/array/binary.go +++ b/go/arrow/array/binary.go @@ -22,10 +22,16 @@ import ( "strings" "unsafe" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" "github.com/goccy/go-json" ) +type BinaryLike interface { + arrow.Array + ValueBytes() []byte + ValueOffset64(int) int64 +} + // A type which represents an immutable sequence of variable-length binary strings. type Binary struct { array @@ -64,6 +70,10 @@ func (a *Binary) ValueOffset(i int) int { return int(a.valueOffsets[a.array.data.offset+i]) } +func (a *Binary) ValueOffset64(i int) int64 { + return int64(a.ValueOffset(i)) +} + func (a *Binary) ValueLen(i int) int { if i < 0 || i >= a.array.data.length { panic("arrow/array: index out of range") @@ -160,6 +170,139 @@ func arrayEqualBinary(left, right *Binary) bool { return true } +type LargeBinary struct { + array + valueOffsets []int64 + valueBytes []byte +} + +func NewLargeBinaryData(data arrow.ArrayData) *LargeBinary { + a := &LargeBinary{} + a.refCount = 1 + a.setData(data.(*Data)) + return a +} + +func (a *LargeBinary) Value(i int) []byte { + if i < 0 || i >= a.array.data.length { + panic("arrow/array: index out of range") + } + idx := a.array.data.offset + i + return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] +} + +func (a *LargeBinary) ValueString(i int) string { + b := a.Value(i) + return *(*string)(unsafe.Pointer(&b)) +} + +func (a *LargeBinary) ValueOffset(i int) int64 { + if i < 0 || i >= a.array.data.length { + panic("arrow/array: index out of range") + } + return a.valueOffsets[a.array.data.offset+i] +} + +func (a *LargeBinary) ValueOffset64(i int) int64 { + return a.ValueOffset(i) +} + +func (a *LargeBinary) ValueLen(i int) int { + if i < 0 || i >= a.array.data.length { + panic("arrow/array: index out of range") + } + beg := a.array.data.offset + i + return int(a.valueOffsets[beg+1] - a.valueOffsets[beg]) +} + +func (a *LargeBinary) ValueOffsets() []int64 { + beg := a.array.data.offset + end := beg + a.array.data.length + 1 + return a.valueOffsets[beg:end] +} + +func (a *LargeBinary) ValueBytes() []byte { + beg := a.array.data.offset + end := beg + a.array.data.length + return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]] +} + +func (a *LargeBinary) String() string { + var o strings.Builder + o.WriteString("[") + for i := 0; i < a.Len(); i++ { + if i > 0 { + o.WriteString(" ") + } + switch { + case a.IsNull(i): + o.WriteString("(null)") + default: + fmt.Fprintf(&o, "%q", a.ValueString(i)) + } + } + o.WriteString("]") + return o.String() +} + +func (a *LargeBinary) setData(data *Data) { + if len(data.buffers) != 3 { + panic("len(data.buffers) != 3") + } + + a.array.setData(data) + + if valueData := data.buffers[2]; valueData != nil { + a.valueBytes = valueData.Bytes() + } + + if valueOffsets := data.buffers[1]; valueOffsets != nil { + a.valueOffsets = arrow.Int64Traits.CastFromBytes(valueOffsets.Bytes()) + } + + if a.array.data.length < 1 { + return + } + + expNumOffsets := a.array.data.offset + a.array.data.length + 1 + if len(a.valueOffsets) < expNumOffsets { + panic(fmt.Errorf("arrow/array: large binary offset buffer must have at least %d values", expNumOffsets)) + } + + if int(a.valueOffsets[expNumOffsets-1]) > len(a.valueBytes) { + panic("arrow/array: large binary offsets out of bounds of data buffer") + } +} + +func (a *LargeBinary) getOneForMarshal(i int) interface{} { + if a.IsNull(i) { + return nil + } + return a.Value(i) +} + +func (a *LargeBinary) MarshalJSON() ([]byte, error) { + vals := make([]interface{}, a.Len()) + for i := 0; i < a.Len(); i++ { + vals[i] = a.getOneForMarshal(i) + } + // golang marshal standard says that []byte will be marshalled + // as a base64-encoded string + return json.Marshal(vals) +} + +func arrayEqualLargeBinary(left, right *LargeBinary) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + if !bytes.Equal(left.Value(i), right.Value(i)) { + return false + } + } + return true +} + var ( _ arrow.Array = (*Binary)(nil) ) diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go index 776d679acfcd7..53baaf18ee142 100644 --- a/go/arrow/array/binary_test.go +++ b/go/arrow/array/binary_test.go @@ -22,9 +22,9 @@ import ( "github.com/stretchr/testify/assert" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestBinary(t *testing.T) { @@ -65,6 +65,48 @@ func TestBinary(t *testing.T) { b.Release() } +func TestLargeBinary(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) + + values := [][]byte{ + []byte("AAA"), + nil, + []byte("BBBB"), + } + valid := []bool{true, false, true} + b.AppendValues(values, valid) + + b.Retain() + b.Release() + + assert.Panics(t, func() { + b.NewBinaryArray() + }) + + a := b.NewLargeBinaryArray() + assert.Equal(t, 3, a.Len()) + assert.Equal(t, 1, a.NullN()) + assert.Equal(t, []byte("AAA"), a.Value(0)) + assert.Equal(t, []byte{}, a.Value(1)) + assert.Equal(t, []byte("BBBB"), a.Value(2)) + a.Release() + + // Test builder reset and NewArray API. + b.AppendValues(values, valid) + a = b.NewArray().(*LargeBinary) + assert.Equal(t, 3, a.Len()) + assert.Equal(t, 1, a.NullN()) + assert.Equal(t, []byte("AAA"), a.Value(0)) + assert.Equal(t, []byte{}, a.Value(1)) + assert.Equal(t, []byte("BBBB"), a.Value(2)) + a.Release() + + b.Release() +} + func TestBinarySliceData(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) @@ -336,6 +378,33 @@ func TestBinaryValueOffset(t *testing.T) { } } +func TestLargeBinaryValueOffset(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} + valids := []bool{true, true, false, false, true, true, true, true, false, true} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) + defer b.Release() + + b.AppendStringValues(values, valids) + + arr := b.NewArray().(*LargeBinary) + defer arr.Release() + + slice := NewSlice(arr, 2, 9).(*LargeBinary) + defer slice.Release() + + offset := 3 + vs := values[2:9] + + for i, v := range vs { + assert.EqualValues(t, offset, slice.ValueOffset(i)) + offset += len(v) + } +} + func TestBinaryValueLen(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) @@ -361,6 +430,31 @@ func TestBinaryValueLen(t *testing.T) { } } +func TestLargeBinaryValueLen(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} + valids := []bool{true, true, false, false, true, true, true, true, false, true} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) + defer b.Release() + + b.AppendStringValues(values, valids) + + arr := b.NewArray().(*LargeBinary) + defer arr.Release() + + slice := NewSlice(arr, 2, 9).(*LargeBinary) + defer slice.Release() + + vs := values[2:9] + + for i, v := range vs { + assert.Equal(t, len(v), slice.ValueLen(i)) + } +} + func TestBinaryValueOffsets(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) @@ -384,6 +478,29 @@ func TestBinaryValueOffsets(t *testing.T) { assert.Equal(t, []int32{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets()) } +func TestLargeBinaryValueOffsets(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} + valids := []bool{true, true, false, false, true, true, true, true, false, true} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) + defer b.Release() + + b.AppendStringValues(values, valids) + + arr := b.NewArray().(*LargeBinary) + defer arr.Release() + + assert.Equal(t, []int64{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets()) + + slice := NewSlice(arr, 2, 9).(*LargeBinary) + defer slice.Release() + + assert.Equal(t, []int64{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets()) +} + func TestBinaryValueBytes(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) @@ -407,6 +524,29 @@ func TestBinaryValueBytes(t *testing.T) { assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes()) } +func TestLargeBinaryValueBytes(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} + valids := []bool{true, true, false, false, true, true, true, true, false, true} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) + defer b.Release() + + b.AppendStringValues(values, valids) + + arr := b.NewArray().(*LargeBinary) + defer arr.Release() + + assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes()) + + slice := NewSlice(arr, 2, 9).(*LargeBinary) + defer slice.Release() + + assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes()) +} + func TestBinaryStringer(t *testing.T) { mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) @@ -430,6 +570,29 @@ func TestBinaryStringer(t *testing.T) { } } +func TestLargeBinaryStringer(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "", "é", "", "hijk", "lm", "", "opq", "", "tu"} + valids := []bool{true, true, false, true, false, true, true, true, true, false, true} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) + defer b.Release() + + b.AppendStringValues(values, valids) + + arr := b.NewArray().(*LargeBinary) + defer arr.Release() + + got := arr.String() + want := `["a" "bc" (null) "é" (null) "hijk" "lm" "" "opq" (null) "tu"]` + + if got != want { + t.Fatalf("invalid stringer:\ngot= %s\nwant=%s\n", got, want) + } +} + func TestBinaryInvalidOffsets(t *testing.T) { const expectedPanic = "arrow/array: binary offsets out of bounds of data buffer" diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go index 2b45e5104a296..1674b864e166b 100644 --- a/go/arrow/array/binarybuilder.go +++ b/go/arrow/array/binarybuilder.go @@ -24,35 +24,68 @@ import ( "reflect" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) -const ( - binaryArrayMaximumCapacity = math.MaxInt32 -) - // A BinaryBuilder is used to build a Binary array using the Append methods. type BinaryBuilder struct { builder dtype arrow.BinaryDataType - offsets *int32BufferBuilder + offsets bufBuilder values *byteBufferBuilder + + appendOffsetVal func(int) + getOffsetVal func(int) int + maxCapacity uint64 + offsetByteWidth int } +// NewBinaryBuilder can be used for any of the variable length binary types, +// Binary, LargeBinary, String, LargeString by passing the appropriate data type func NewBinaryBuilder(mem memory.Allocator, dtype arrow.BinaryDataType) *BinaryBuilder { + var ( + offsets bufBuilder + offsetValFn func(int) + maxCapacity uint64 + offsetByteWidth int + getOffsetVal func(int) int + ) + switch dtype.Layout().Buffers[1].ByteWidth { + case 4: + b := newInt32BufferBuilder(mem) + offsetValFn = func(v int) { b.AppendValue(int32(v)) } + getOffsetVal = func(i int) int { return int(b.Value(i)) } + offsets = b + maxCapacity = math.MaxInt32 + offsetByteWidth = arrow.Int32SizeBytes + case 8: + b := newInt64BufferBuilder(mem) + offsetValFn = func(v int) { b.AppendValue(int64(v)) } + getOffsetVal = func(i int) int { return int(b.Value(i)) } + offsets = b + maxCapacity = math.MaxInt64 + offsetByteWidth = arrow.Int64SizeBytes + } + b := &BinaryBuilder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - offsets: newInt32BufferBuilder(mem), - values: newByteBufferBuilder(mem), + builder: builder{refCount: 1, mem: mem}, + dtype: dtype, + offsets: offsets, + values: newByteBufferBuilder(mem), + appendOffsetVal: offsetValFn, + maxCapacity: maxCapacity, + offsetByteWidth: offsetByteWidth, + getOffsetVal: getOffsetVal, } return b } +func (b *BinaryBuilder) Type() arrow.DataType { return b.dtype } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. @@ -92,6 +125,12 @@ func (b *BinaryBuilder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *BinaryBuilder) AppendEmptyValue() { + b.Reserve(1) + b.appendNextOffset() + b.UnsafeAppendBoolToBitmap(true) +} + // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. @@ -135,20 +174,19 @@ func (b *BinaryBuilder) AppendStringValues(v []string, valid []bool) { } func (b *BinaryBuilder) Value(i int) []byte { - offsets := b.offsets.Values() - start := int(offsets[i]) + start := b.getOffsetVal(i) var end int if i == (b.length - 1) { end = b.values.Len() } else { - end = int(offsets[i+1]) + end = b.getOffsetVal(i + 1) } return b.values.Bytes()[start:end] } func (b *BinaryBuilder) init(capacity int) { b.builder.init(capacity) - b.offsets.resize((capacity + 1) * arrow.Int32SizeBytes) + b.offsets.resize((capacity + 1) * b.offsetByteWidth) } // DataLen returns the number of bytes in the data array. @@ -175,7 +213,7 @@ func (b *BinaryBuilder) ReserveData(n int) { // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may be reduced. func (b *BinaryBuilder) Resize(n int) { - b.offsets.resize((n + 1) * arrow.Int32SizeBytes) + b.offsets.resize((n + 1) * b.offsetByteWidth) b.builder.resize(n, b.init) } @@ -185,19 +223,40 @@ func (b *BinaryBuilder) ResizeData(n int) { // NewArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder // so it can be used to build a new array. +// +// Builds the appropriate Binary or LargeBinary array based on the datatype +// it was initialized with. func (b *BinaryBuilder) NewArray() arrow.Array { - return b.NewBinaryArray() + if b.offsetByteWidth == arrow.Int32SizeBytes { + return b.NewBinaryArray() + } + return b.NewLargeBinaryArray() } // NewBinaryArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder // so it can be used to build a new array. func (b *BinaryBuilder) NewBinaryArray() (a *Binary) { + if b.offsetByteWidth != arrow.Int32SizeBytes { + panic("arrow/array: invalid call to NewBinaryArray when building a LargeBinary array") + } + data := b.newData() a = NewBinaryData(data) data.Release() return } +func (b *BinaryBuilder) NewLargeBinaryArray() (a *LargeBinary) { + if b.offsetByteWidth != arrow.Int64SizeBytes { + panic("arrow/array: invalid call to NewLargeBinaryArray when building a Binary array") + } + + data := b.newData() + a = NewLargeBinaryData(data) + data.Release() + return +} + func (b *BinaryBuilder) newData() (data *Data) { b.appendNextOffset() offsets, values := b.offsets.Finish(), b.values.Finish() @@ -217,8 +276,8 @@ func (b *BinaryBuilder) newData() (data *Data) { func (b *BinaryBuilder) appendNextOffset() { numBytes := b.values.Len() - debug.Assert(numBytes <= binaryArrayMaximumCapacity, "exceeded maximum capacity of binary array") - b.offsets.AppendValue(int32(numBytes)) + debug.Assert(uint64(numBytes) <= b.maxCapacity, "exceeded maximum capacity of binary array") + b.appendOffsetVal(numBytes) } func (b *BinaryBuilder) unmarshalOne(dec *json.Decoder) error { diff --git a/go/arrow/array/binarybuilder_test.go b/go/arrow/array/binarybuilder_test.go index 73c5a23aebd2d..1335b75fe65a0 100644 --- a/go/arrow/array/binarybuilder_test.go +++ b/go/arrow/array/binarybuilder_test.go @@ -20,9 +20,9 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) @@ -71,7 +71,7 @@ func TestBinaryBuilder_ReserveData(t *testing.T) { // when appending entries until that count. ab.ReserveData(256) expCap := ab.DataCap() - for i := 0; i < 256 / 8; i++ { + for i := 0; i < 256/8; i++ { ab.Append(bytes.Repeat([]byte("a"), 8)) } assert.Equal(t, expCap, ab.DataCap(), "unexpected BinaryArrayBuilder.DataCap()") @@ -85,3 +85,63 @@ func TestBinaryBuilder_ReserveData(t *testing.T) { assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") } + +func TestBinaryBuilderLarge(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) + + exp := [][]byte{[]byte("foo"), []byte("bar"), nil, []byte("sydney"), []byte("cameron")} + for _, v := range exp { + if v == nil { + ab.AppendNull() + } else { + ab.Append(v) + } + } + + assert.Equal(t, len(exp), ab.Len(), "unexpected Len()") + assert.Equal(t, 1, ab.NullN(), "unexpected NullN()") + + for i, v := range exp { + if v == nil { + v = []byte{} + } + assert.Equal(t, v, ab.Value(i), "unexpected BinaryArrayBuilder.Value(%d)", i) + } + + ar := ab.NewLargeBinaryArray() + ab.Release() + ar.Release() + + // check state of builder after NewBinaryArray + assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") + assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") + assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") +} + +func TestBinaryBuilderLarge_ReserveData(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) + + // call ReserveData and ensure the capacity doesn't change + // when appending entries until that count. + ab.ReserveData(256) + expCap := ab.DataCap() + for i := 0; i < 256/8; i++ { + ab.Append(bytes.Repeat([]byte("a"), 8)) + } + assert.Equal(t, expCap, ab.DataCap(), "unexpected BinaryArrayBuilder.DataCap()") + + ar := ab.NewLargeBinaryArray() + ab.Release() + ar.Release() + + // check state of builder after NewBinaryArray + assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") + assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") + assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") +} diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go index 1ebe57c930e05..2eed49bd65b70 100644 --- a/go/arrow/array/boolean.go +++ b/go/arrow/array/boolean.go @@ -20,9 +20,9 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) diff --git a/go/arrow/array/boolean_test.go b/go/arrow/array/boolean_test.go index 372d06fc39746..88cd8fc882bbf 100644 --- a/go/arrow/array/boolean_test.go +++ b/go/arrow/array/boolean_test.go @@ -22,8 +22,8 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestBooleanSliceData(t *testing.T) { diff --git a/go/arrow/array/booleanbuilder.go b/go/arrow/array/booleanbuilder.go index 6d2905d2d5f07..760d755314a7a 100644 --- a/go/arrow/array/booleanbuilder.go +++ b/go/arrow/array/booleanbuilder.go @@ -23,10 +23,10 @@ import ( "strconv" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -41,6 +41,8 @@ func NewBooleanBuilder(mem memory.Allocator) *BooleanBuilder { return &BooleanBuilder{builder: builder{refCount: 1, mem: mem}} } +func (b *BooleanBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.Boolean } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. @@ -75,6 +77,11 @@ func (b *BooleanBuilder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *BooleanBuilder) AppendEmptyValue() { + b.Reserve(1) + b.UnsafeAppend(false) +} + func (b *BooleanBuilder) UnsafeAppend(v bool) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) if v { @@ -180,6 +187,12 @@ func (b *BooleanBuilder) unmarshalOne(dec *json.Decoder) error { return err } b.Append(val) + case json.Number: + val, err := strconv.ParseBool(v.String()) + if err != nil { + return err + } + b.Append(val) case nil: b.AppendNull() default: @@ -203,6 +216,7 @@ func (b *BooleanBuilder) unmarshal(dec *json.Decoder) error { func (b *BooleanBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) + dec.UseNumber() t, err := dec.Token() if err != nil { return err diff --git a/go/arrow/array/booleanbuilder_test.go b/go/arrow/array/booleanbuilder_test.go index cbbe44ffabdc7..469f9ec9d803b 100644 --- a/go/arrow/array/booleanbuilder_test.go +++ b/go/arrow/array/booleanbuilder_test.go @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/bufferbuilder.go b/go/arrow/array/bufferbuilder.go index 5901526179e1d..6a91031c22bef 100644 --- a/go/arrow/array/bufferbuilder.go +++ b/go/arrow/array/bufferbuilder.go @@ -19,11 +19,24 @@ package array import ( "sync/atomic" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" ) +type bufBuilder interface { + Retain() + Release() + Len() int + Cap() int + Bytes() []byte + resize(int) + Advance(int) + Append([]byte) + Reset() + Finish() *memory.Buffer +} + // A bufferBuilder provides common functionality for populating memory with a sequence of type-specific values. // Specialized implementations provide type-safe APIs for appending and accessing the memory. type bufferBuilder struct { diff --git a/go/arrow/array/bufferbuilder_byte.go b/go/arrow/array/bufferbuilder_byte.go index e10bc301ac896..c34a409aa704c 100644 --- a/go/arrow/array/bufferbuilder_byte.go +++ b/go/arrow/array/bufferbuilder_byte.go @@ -16,7 +16,7 @@ package array -import "github.com/apache/arrow/go/v9/arrow/memory" +import "github.com/apache/arrow/go/v10/arrow/memory" type byteBufferBuilder struct { bufferBuilder diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go b/go/arrow/array/bufferbuilder_numeric.gen.go index f800fa4b31995..1b87e6eeb625d 100644 --- a/go/arrow/array/bufferbuilder_numeric.gen.go +++ b/go/arrow/array/bufferbuilder_numeric.gen.go @@ -19,11 +19,44 @@ package array import ( - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" ) +type int64BufferBuilder struct { + bufferBuilder +} + +func newInt64BufferBuilder(mem memory.Allocator) *int64BufferBuilder { + return &int64BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} +} + +// AppendValues appends the contents of v to the buffer, growing the buffer as needed. +func (b *int64BufferBuilder) AppendValues(v []int64) { b.Append(arrow.Int64Traits.CastToBytes(v)) } + +// Values returns a slice of length b.Len(). +// The slice is only valid for use until the next buffer modification. That is, until the next call +// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next +// buffer modification. +func (b *int64BufferBuilder) Values() []int64 { return arrow.Int64Traits.CastFromBytes(b.Bytes()) } + +// Value returns the int64 element at the index i. Value will panic if i is negative or ≥ Len. +func (b *int64BufferBuilder) Value(i int) int64 { return b.Values()[i] } + +// Len returns the number of int64 elements in the buffer. +func (b *int64BufferBuilder) Len() int { return b.length / arrow.Int64SizeBytes } + +// AppendValue appends v to the buffer, growing the buffer as needed. +func (b *int64BufferBuilder) AppendValue(v int64) { + if b.capacity < b.length+arrow.Int64SizeBytes { + newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int64SizeBytes) + b.resize(newCapacity) + } + arrow.Int64Traits.PutValue(b.bytes[b.length:], v) + b.length += arrow.Int64SizeBytes +} + type int32BufferBuilder struct { bufferBuilder } @@ -56,3 +89,36 @@ func (b *int32BufferBuilder) AppendValue(v int32) { arrow.Int32Traits.PutValue(b.bytes[b.length:], v) b.length += arrow.Int32SizeBytes } + +type int8BufferBuilder struct { + bufferBuilder +} + +func newInt8BufferBuilder(mem memory.Allocator) *int8BufferBuilder { + return &int8BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} +} + +// AppendValues appends the contents of v to the buffer, growing the buffer as needed. +func (b *int8BufferBuilder) AppendValues(v []int8) { b.Append(arrow.Int8Traits.CastToBytes(v)) } + +// Values returns a slice of length b.Len(). +// The slice is only valid for use until the next buffer modification. That is, until the next call +// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next +// buffer modification. +func (b *int8BufferBuilder) Values() []int8 { return arrow.Int8Traits.CastFromBytes(b.Bytes()) } + +// Value returns the int8 element at the index i. Value will panic if i is negative or ≥ Len. +func (b *int8BufferBuilder) Value(i int) int8 { return b.Values()[i] } + +// Len returns the number of int8 elements in the buffer. +func (b *int8BufferBuilder) Len() int { return b.length / arrow.Int8SizeBytes } + +// AppendValue appends v to the buffer, growing the buffer as needed. +func (b *int8BufferBuilder) AppendValue(v int8) { + if b.capacity < b.length+arrow.Int8SizeBytes { + newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int8SizeBytes) + b.resize(newCapacity) + } + arrow.Int8Traits.PutValue(b.bytes[b.length:], v) + b.length += arrow.Int8SizeBytes +} diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl index 727ae763cc4f6..71d03c0fdf699 100644 --- a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl +++ b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl @@ -17,9 +17,9 @@ package array import ( - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" ) {{range .In}} diff --git a/go/arrow/array/bufferbuilder_numeric_test.go b/go/arrow/array/bufferbuilder_numeric_test.go index 9e6f9ba077141..67c8438006700 100644 --- a/go/arrow/array/bufferbuilder_numeric_test.go +++ b/go/arrow/array/bufferbuilder_numeric_test.go @@ -20,8 +20,8 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/endian" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go index 4e2a21f24e8ce..6a2146c080c40 100644 --- a/go/arrow/array/builder.go +++ b/go/arrow/array/builder.go @@ -20,9 +20,9 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -35,6 +35,9 @@ type Builder interface { // you can unmarshal a json array to add the values to a builder json.Unmarshaler + // Type returns the datatype that this is building + Type() arrow.DataType + // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. Retain() @@ -55,6 +58,9 @@ type Builder interface { // AppendNull adds a new null value to the array being built. AppendNull() + // AppendEmptyValue adds a new zero value of the appropriate type + AppendEmptyValue() + // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. Reserve(n int) @@ -73,6 +79,8 @@ type Builder interface { unmarshalOne(*json.Decoder) error unmarshal(*json.Decoder) error + + newData() *Data } // builder provides common functionality for managing the validity bitmap (nulls) when building arrays. @@ -249,8 +257,12 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder { return NewFloat64Builder(mem) case arrow.STRING: return NewStringBuilder(mem) + case arrow.LARGE_STRING: + return NewLargeStringBuilder(mem) case arrow.BINARY: return NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) + case arrow.LARGE_BINARY: + return NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) case arrow.FIXED_SIZE_BINARY: typ := dtype.(*arrow.FixedSizeBinaryType) return NewFixedSizeBinaryBuilder(mem, typ) @@ -287,20 +299,27 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder { return NewDecimal128Builder(mem, typ) } case arrow.DECIMAL256: + if typ, ok := dtype.(*arrow.Decimal256Type); ok { + return NewDecimal256Builder(mem, typ) + } case arrow.LIST: typ := dtype.(*arrow.ListType) - return NewListBuilder(mem, typ.Elem()) + return NewListBuilderWithField(mem, typ.ElemField()) case arrow.STRUCT: typ := dtype.(*arrow.StructType) return NewStructBuilder(mem, typ) case arrow.SPARSE_UNION: + typ := dtype.(*arrow.SparseUnionType) + return NewSparseUnionBuilder(mem, typ) case arrow.DENSE_UNION: + typ := dtype.(*arrow.DenseUnionType) + return NewDenseUnionBuilder(mem, typ) case arrow.DICTIONARY: typ := dtype.(*arrow.DictionaryType) return NewDictionaryBuilder(mem, typ) - case arrow.LARGE_STRING: - case arrow.LARGE_BINARY: case arrow.LARGE_LIST: + typ := dtype.(*arrow.LargeListType) + return NewLargeListBuilderWithField(mem, typ.ElemField()) case arrow.MAP: typ := dtype.(*arrow.MapType) return NewMapBuilder(mem, typ.KeyType(), typ.ItemType(), typ.KeysSorted) diff --git a/go/arrow/array/builder_test.go b/go/arrow/array/builder_test.go index 3ba2b026ccc3b..efc62f9038dbd 100644 --- a/go/arrow/array/builder_test.go +++ b/go/arrow/array/builder_test.go @@ -19,8 +19,8 @@ package array import ( "testing" - "github.com/apache/arrow/go/v9/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go index b0b149a6c7360..78075cd0f413b 100644 --- a/go/arrow/array/compare.go +++ b/go/arrow/array/compare.go @@ -20,8 +20,8 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/float16" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/float16" ) // RecordEqual reports whether the two provided records are equal. @@ -234,6 +234,12 @@ func Equal(left, right arrow.Array) bool { case *String: r := right.(*String) return arrayEqualString(l, r) + case *LargeBinary: + r := right.(*LargeBinary) + return arrayEqualLargeBinary(l, r) + case *LargeString: + r := right.(*LargeString) + return arrayEqualLargeString(l, r) case *Int8: r := right.(*Int8) return arrayEqualInt8(l, r) @@ -270,6 +276,9 @@ func Equal(left, right arrow.Array) bool { case *Decimal128: r := right.(*Decimal128) return arrayEqualDecimal128(l, r) + case *Decimal256: + r := right.(*Decimal256) + return arrayEqualDecimal256(l, r) case *Date32: r := right.(*Date32) return arrayEqualDate32(l, r) @@ -288,6 +297,9 @@ func Equal(left, right arrow.Array) bool { case *List: r := right.(*List) return arrayEqualList(l, r) + case *LargeList: + r := right.(*LargeList) + return arrayEqualLargeList(l, r) case *FixedSizeList: r := right.(*FixedSizeList) return arrayEqualFixedSizeList(l, r) @@ -315,6 +327,12 @@ func Equal(left, right arrow.Array) bool { case *Dictionary: r := right.(*Dictionary) return arrayEqualDict(l, r) + case *SparseUnion: + r := right.(*SparseUnion) + return arraySparseUnionEqual(l, r) + case *DenseUnion: + r := right.(*DenseUnion) + return arrayDenseUnionEqual(l, r) default: panic(fmt.Errorf("arrow/array: unknown array type %T", l)) } @@ -348,12 +366,17 @@ func ArraySliceApproxEqual(left arrow.Array, lbeg, lend int64, right arrow.Array // SliceApproxEqual reports whether slices left[lbeg:lend] and right[rbeg:rend] are approximately equal. func SliceApproxEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64, opts ...EqualOption) bool { + opt := newEqualOption(opts...) + return sliceApproxEqual(left, lbeg, lend, right, rbeg, rend, opt) +} + +func sliceApproxEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64, opt equalOption) bool { l := NewSlice(left, lbeg, lend) defer l.Release() r := NewSlice(right, rbeg, rend) defer r.Release() - return ApproxEqual(l, r, opts...) + return arrayApproxEqual(l, r, opt) } const defaultAbsoluteTolerance = 1e-5 @@ -469,6 +492,12 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool { case *String: r := right.(*String) return arrayEqualString(l, r) + case *LargeBinary: + r := right.(*LargeBinary) + return arrayEqualLargeBinary(l, r) + case *LargeString: + r := right.(*LargeString) + return arrayEqualLargeString(l, r) case *Int8: r := right.(*Int8) return arrayEqualInt8(l, r) @@ -505,6 +534,9 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool { case *Decimal128: r := right.(*Decimal128) return arrayEqualDecimal128(l, r) + case *Decimal256: + r := right.(*Decimal256) + return arrayEqualDecimal256(l, r) case *Date32: r := right.(*Date32) return arrayEqualDate32(l, r) @@ -523,6 +555,9 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool { case *List: r := right.(*List) return arrayApproxEqualList(l, r, opt) + case *LargeList: + r := right.(*LargeList) + return arrayApproxEqualLargeList(l, r, opt) case *FixedSizeList: r := right.(*FixedSizeList) return arrayApproxEqualFixedSizeList(l, r, opt) @@ -550,6 +585,12 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool { case ExtensionArray: r := right.(ExtensionArray) return arrayApproxEqualExtension(l, r, opt) + case *SparseUnion: + r := right.(*SparseUnion) + return arraySparseUnionApproxEqual(l, r, opt) + case *DenseUnion: + r := right.(*DenseUnion) + return arrayDenseUnionApproxEqual(l, r, opt) default: panic(fmt.Errorf("arrow/array: unknown array type %T", l)) } @@ -638,6 +679,25 @@ func arrayApproxEqualList(left, right *List, opt equalOption) bool { return true } +func arrayApproxEqualLargeList(left, right *LargeList, opt equalOption) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + o := func() bool { + l := left.newListValue(i) + defer l.Release() + r := right.newListValue(i) + defer r.Release() + return arrayApproxEqual(l, r, opt) + }() + if !o { + return false + } + } + return true +} + func arrayApproxEqualFixedSizeList(left, right *FixedSizeList, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go index d34234ec0b67d..62c7e39570fb8 100644 --- a/go/arrow/array/compare_test.go +++ b/go/arrow/array/compare_test.go @@ -21,11 +21,11 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/float16" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/float16" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go index 22228dcd3905a..22885f569ab28 100644 --- a/go/arrow/array/concat.go +++ b/go/arrow/array/concat.go @@ -22,12 +22,12 @@ import ( "math" "math/bits" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/internal/utils" ) // Concatenate creates a new arrow.Array which is the concatenation of the @@ -175,20 +175,7 @@ func concatBuffers(bufs []*memory.Buffer, mem memory.Allocator) *memory.Buffer { return out } -// concatOffsets creates a single offset buffer which represents the concatenation of all of the -// offsets buffers, adjusting the offsets appropriately to their new relative locations. -// -// It also returns the list of ranges that need to be fetched for the corresponding value buffers -// to construct the final concatenated value buffer. -func concatOffsets(buffers []*memory.Buffer, mem memory.Allocator) (*memory.Buffer, []rng, error) { - outLen := 0 - for _, b := range buffers { - outLen += b.Len() / arrow.Int32SizeBytes - } - - out := memory.NewResizableBuffer(mem) - out.Resize(arrow.Int32Traits.BytesRequired(outLen + 1)) - +func handle32BitOffsets(outLen int, buffers []*memory.Buffer, out *memory.Buffer) (*memory.Buffer, []rng, error) { dst := arrow.Int32Traits.CastFromBytes(out.Bytes()) valuesRanges := make([]rng, len(buffers)) nextOffset := int32(0) @@ -309,6 +296,70 @@ func concatDictIndices(mem memory.Allocator, data []arrow.ArrayData, idxType arr return } +func handle64BitOffsets(outLen int, buffers []*memory.Buffer, out *memory.Buffer) (*memory.Buffer, []rng, error) { + dst := arrow.Int64Traits.CastFromBytes(out.Bytes()) + valuesRanges := make([]rng, len(buffers)) + nextOffset := int64(0) + nextElem := int(0) + for i, b := range buffers { + if b.Len() == 0 { + valuesRanges[i].offset = 0 + valuesRanges[i].len = 0 + continue + } + + // when we gather our buffers, we sliced off the last offset from the buffer + // so that we could count the lengths accurately + src := arrow.Int64Traits.CastFromBytes(b.Bytes()) + valuesRanges[i].offset = int(src[0]) + // expand our slice to see that final offset + expand := src[:len(src)+1] + // compute the length of this range by taking the final offset and subtracting where we started. + valuesRanges[i].len = int(expand[len(src)]) - valuesRanges[i].offset + + if nextOffset > math.MaxInt64-int64(valuesRanges[i].len) { + return nil, nil, errors.New("offset overflow while concatenating arrays") + } + + // adjust each offset by the difference between our last ending point and our starting point + adj := nextOffset - src[0] + for j, o := range src { + dst[nextElem+j] = adj + o + } + + // the next index for an element in the output buffer + nextElem += b.Len() / arrow.Int64SizeBytes + // update our offset counter to be the total current length of our output + nextOffset += int64(valuesRanges[i].len) + } + + // final offset should point to the end of the data + dst[outLen] = nextOffset + return out, valuesRanges, nil +} + +// concatOffsets creates a single offset buffer which represents the concatenation of all of the +// offsets buffers, adjusting the offsets appropriately to their new relative locations. +// +// It also returns the list of ranges that need to be fetched for the corresponding value buffers +// to construct the final concatenated value buffer. +func concatOffsets(buffers []*memory.Buffer, byteWidth int, mem memory.Allocator) (*memory.Buffer, []rng, error) { + outLen := 0 + for _, b := range buffers { + outLen += b.Len() / byteWidth + } + + out := memory.NewResizableBuffer(mem) + out.Resize(byteWidth * (outLen + 1)) + + switch byteWidth { + case arrow.Int64SizeBytes: + return handle64BitOffsets(outLen, buffers, out) + default: + return handle32BitOffsets(outLen, buffers, out) + } +} + // concat is the implementation for actually performing the concatenation of the arrow.ArrayData // objects that we can call internally for nested types. func concat(data []arrow.ArrayData, mem memory.Allocator) (arrow.ArrayData, error) { @@ -378,14 +429,33 @@ func concat(data []arrow.ArrayData, mem memory.Allocator) (arrow.ArrayData, erro case arrow.FixedWidthDataType: out.buffers[1] = concatBuffers(gatherBuffersFixedWidthType(data, 1, dt), mem) case arrow.BinaryDataType: - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, arrow.Int32SizeBytes), mem) + offsetWidth := dt.Layout().Buffers[1].ByteWidth + offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) if err != nil { return nil, err } out.buffers[2] = concatBuffers(gatherBufferRanges(data, 2, valueRanges), mem) out.buffers[1] = offsetBuffer case *arrow.ListType: - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, arrow.Int32SizeBytes), mem) + offsetWidth := dt.Layout().Buffers[1].ByteWidth + offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) + if err != nil { + return nil, err + } + childData := gatherChildrenRanges(data, 0, valueRanges) + for _, c := range childData { + defer c.Release() + } + + out.buffers[1] = offsetBuffer + out.childData = make([]arrow.ArrayData, 1) + out.childData[0], err = concat(childData, mem) + if err != nil { + return nil, err + } + case *arrow.LargeListType: + offsetWidth := dt.Layout().Buffers[1].ByteWidth + offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) if err != nil { return nil, err } @@ -426,7 +496,8 @@ func concat(data []arrow.ArrayData, mem memory.Allocator) (arrow.ArrayData, erro out.childData[i] = childData } case *arrow.MapType: - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, arrow.Int32SizeBytes), mem) + offsetWidth := dt.Layout().Buffers[1].ByteWidth + offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) if err != nil { return nil, err } diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go index 5362fd33f55b0..f3c1c7ea80f28 100644 --- a/go/arrow/array/concat_test.go +++ b/go/arrow/array/concat_test.go @@ -22,11 +22,11 @@ import ( "sort" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -74,7 +74,9 @@ func TestConcatenate(t *testing.T) { {arrow.PrimitiveTypes.Float32}, {arrow.PrimitiveTypes.Float64}, {arrow.BinaryTypes.String}, + {arrow.BinaryTypes.LargeString}, {arrow.ListOf(arrow.PrimitiveTypes.Int8)}, + {arrow.LargeListOf(arrow.PrimitiveTypes.Int8)}, {arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int8)}, {arrow.StructOf()}, {arrow.MapOf(arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8)}, @@ -143,6 +145,8 @@ func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) arrow.Arra return array.NewNull(int(size)) case arrow.STRING: return cts.rng.String(size, 0, 15, nullprob) + case arrow.LARGE_STRING: + return cts.rng.LargeString(size, 0, 15, nullprob) case arrow.LIST: valuesSize := size * 4 values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) @@ -155,6 +159,32 @@ func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) arrow.Arra bldr := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) defer bldr.Release() + valid := make([]bool, len(offsetsVector)-1) + for i := range valid { + valid[i] = true + } + bldr.AppendValues(offsetsVector, valid) + vb := bldr.ValueBuilder().(*array.Int8Builder) + for i := 0; i < values.Len(); i++ { + if values.IsValid(i) { + vb.Append(values.Value(i)) + } else { + vb.AppendNull() + } + } + return bldr.NewArray() + case arrow.LARGE_LIST: + valuesSize := size * 8 + values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) + defer values.Release() + offsetsVector := cts.largeoffsets(int64(valuesSize), int32(size)) + // ensure the first and last offsets encompass the whole values + offsetsVector[0] = 0 + offsetsVector[len(offsetsVector)-1] = int64(valuesSize) + + bldr := array.NewLargeListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) + defer bldr.Release() + valid := make([]bool, len(offsetsVector)-1) for i := range valid { valid[i] = true @@ -260,6 +290,16 @@ func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 { return offsets } +func (cts *ConcatTestSuite) largeoffsets(length int64, slicecount int32) []int64 { + offsets := make([]int64, slicecount+1) + dist := rand.New(rand.NewSource(cts.seed)) + for i := range offsets { + offsets[i] = dist.Int63n(length + 1) + } + sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) + return offsets +} + func (cts *ConcatTestSuite) TestCheckConcat() { for _, sz := range cts.sizes { cts.Run(fmt.Sprintf("size %d", sz), func() { diff --git a/go/arrow/array/data.go b/go/arrow/array/data.go index b7a1993c1a3b8..c061095969436 100644 --- a/go/arrow/array/data.go +++ b/go/arrow/array/data.go @@ -22,9 +22,9 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" ) // Data represents the memory and metadata of an Arrow array. @@ -77,6 +77,19 @@ func NewDataWithDictionary(dtype arrow.DataType, length int, buffers []*memory.B return data } +func (d *Data) Copy() *Data { + // don't pass the slices directly, otherwise it retains the connection + // we need to make new slices and populate them with the same pointers + bufs := make([]*memory.Buffer, len(d.buffers)) + copy(bufs, d.buffers) + children := make([]arrow.ArrayData, len(d.childData)) + copy(children, d.childData) + + data := NewData(d.dtype, d.length, bufs, children, d.nulls, d.offset) + data.SetDictionary(d.dictionary) + return data +} + // Reset sets the Data for re-use. func (d *Data) Reset(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []arrow.ArrayData, nulls, offset int) { // Retain new buffers before releasing existing buffers in-case they're the same ones to prevent accidental premature diff --git a/go/arrow/array/data_test.go b/go/arrow/array/data_test.go index d64d8afb94b5c..7191f1e3457b3 100644 --- a/go/arrow/array/data_test.go +++ b/go/arrow/array/data_test.go @@ -19,8 +19,8 @@ package array import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/decimal128.go b/go/arrow/array/decimal128.go index 0113c5d7fd8ce..2d80a44345438 100644 --- a/go/arrow/array/decimal128.go +++ b/go/arrow/array/decimal128.go @@ -25,11 +25,11 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -126,6 +126,8 @@ func NewDecimal128Builder(mem memory.Allocator, dtype *arrow.Decimal128Type) *De } } +func (b *Decimal128Builder) Type() arrow.DataType { return b.dtype } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Decimal128Builder) Release() { @@ -160,6 +162,10 @@ func (b *Decimal128Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Decimal128Builder) AppendEmptyValue() { + b.Append(decimal128.Num{}) +} + func (b *Decimal128Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) diff --git a/go/arrow/array/decimal128_test.go b/go/arrow/array/decimal128_test.go index 84144a7abb4bd..123da6c4ab3b3 100644 --- a/go/arrow/array/decimal128_test.go +++ b/go/arrow/array/decimal128_test.go @@ -19,10 +19,10 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) @@ -77,7 +77,9 @@ func TestNewDecimal128Builder(t *testing.T) { assert.Equal(t, want, a.Values(), "unexpected Decimal128Values") assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity + assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") assert.Len(t, a.Values(), 10, "unexpected length of Decimal128Values") + assert.Equal(t, 10*arrow.Decimal128SizeBytes, a.Data().Buffers()[1].Len()) a.Release() ab.Append(decimal128.FromI64(7)) @@ -88,6 +90,7 @@ func TestNewDecimal128Builder(t *testing.T) { assert.Equal(t, 0, a.NullN()) assert.Equal(t, []decimal128.Num{decimal128.FromI64(7), decimal128.FromI64(8)}, a.Values()) assert.Len(t, a.Values(), 2) + assert.Equal(t, 2*arrow.Decimal128SizeBytes, a.Data().Buffers()[1].Len()) a.Release() } diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go new file mode 100644 index 0000000000000..8ad45a6b8d659 --- /dev/null +++ b/go/arrow/array/decimal256.go @@ -0,0 +1,333 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array + +import ( + "bytes" + "fmt" + "math" + "math/big" + "reflect" + "strings" + "sync/atomic" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/goccy/go-json" +) + +// Decimal256 is a type that represents an immutable sequence of 256-bit decimal values. +type Decimal256 struct { + array + + values []decimal256.Num +} + +func NewDecimal256Data(data arrow.ArrayData) *Decimal256 { + a := &Decimal256{} + a.refCount = 1 + a.setData(data.(*Data)) + return a +} + +func (a *Decimal256) Value(i int) decimal256.Num { return a.values[i] } + +func (a *Decimal256) Values() []decimal256.Num { return a.values } + +func (a *Decimal256) String() string { + o := new(strings.Builder) + o.WriteString("[") + for i := 0; i < a.Len(); i++ { + if i > 0 { + fmt.Fprintf(o, " ") + } + switch { + case a.IsNull(i): + o.WriteString("(null)") + default: + fmt.Fprintf(o, "%v", a.Value(i)) + } + } + o.WriteString("]") + return o.String() +} + +func (a *Decimal256) setData(data *Data) { + a.array.setData(data) + vals := data.buffers[1] + if vals != nil { + a.values = arrow.Decimal256Traits.CastFromBytes(vals.Bytes()) + beg := a.array.data.offset + end := beg + a.array.data.length + a.values = a.values[beg:end] + } +} + +func (a *Decimal256) getOneForMarshal(i int) interface{} { + if a.IsNull(i) { + return nil + } + + typ := a.DataType().(*arrow.Decimal256Type) + f := (&big.Float{}).SetInt(a.Value(i).BigInt()) + f.Quo(f, big.NewFloat(math.Pow10(int(typ.Scale)))) + return f.Text('g', int(typ.Precision)) +} + +func (a *Decimal256) MarshalJSON() ([]byte, error) { + vals := make([]interface{}, a.Len()) + for i := 0; i < a.Len(); i++ { + vals[i] = a.getOneForMarshal(i) + } + return json.Marshal(vals) +} + +func arrayEqualDecimal256(left, right *Decimal256) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + if left.Value(i) != right.Value(i) { + return false + } + } + return true +} + +type Decimal256Builder struct { + builder + + dtype *arrow.Decimal256Type + data *memory.Buffer + rawData []decimal256.Num +} + +func NewDecimal256Builder(mem memory.Allocator, dtype *arrow.Decimal256Type) *Decimal256Builder { + return &Decimal256Builder{ + builder: builder{refCount: 1, mem: mem}, + dtype: dtype, + } +} + +// Release decreases the reference count by 1. +// When the reference count goes to zero, the memory is freed. +func (b *Decimal256Builder) Release() { + debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") + + if atomic.AddInt64(&b.refCount, -1) == 0 { + if b.nullBitmap != nil { + b.nullBitmap.Release() + b.nullBitmap = nil + } + if b.data != nil { + b.data.Release() + b.data = nil + b.rawData = nil + } + } +} + +func (b *Decimal256Builder) Append(v decimal256.Num) { + b.Reserve(1) + b.UnsafeAppend(v) +} + +func (b *Decimal256Builder) UnsafeAppend(v decimal256.Num) { + bitutil.SetBit(b.nullBitmap.Bytes(), b.length) + b.rawData[b.length] = v + b.length++ +} + +func (b *Decimal256Builder) AppendNull() { + b.Reserve(1) + b.UnsafeAppendBoolToBitmap(false) +} + +func (b *Decimal256Builder) AppendEmptyValue() { + b.Append(decimal256.Num{}) +} + +func (b *Decimal256Builder) Type() arrow.DataType { return b.dtype } + +func (b *Decimal256Builder) UnsafeAppendBoolToBitmap(isValid bool) { + if isValid { + bitutil.SetBit(b.nullBitmap.Bytes(), b.length) + } else { + b.nulls++ + } + b.length++ +} + +// AppendValues will append the values in the v slice. The valid slice determines which values +// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, +// all values in v are appended and considered valid. +func (b *Decimal256Builder) AppendValues(v []decimal256.Num, valid []bool) { + if len(v) != len(valid) && len(valid) != 0 { + panic("arrow/array: len(v) != len(valid) && len(valid) != 0") + } + + if len(v) == 0 { + return + } + + b.Reserve(len(v)) + if len(v) > 0 { + arrow.Decimal256Traits.Copy(b.rawData[b.length:], v) + } + b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) +} + +func (b *Decimal256Builder) init(capacity int) { + b.builder.init(capacity) + + b.data = memory.NewResizableBuffer(b.mem) + bytesN := arrow.Decimal256Traits.BytesRequired(capacity) + b.data.Resize(bytesN) + b.rawData = arrow.Decimal256Traits.CastFromBytes(b.data.Bytes()) +} + +// Reserve ensures there is enough space for appending n elements +// by checking the capacity and calling Resize if necessary. +func (b *Decimal256Builder) Reserve(n int) { + b.builder.reserve(n, b.Resize) +} + +// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), +// additional memory will be allocated. If n is smaller, the allocated memory may reduced. +func (b *Decimal256Builder) Resize(n int) { + nBuilder := n + if n < minBuilderCapacity { + n = minBuilderCapacity + } + + if b.capacity == 0 { + b.init(n) + } else { + b.builder.resize(nBuilder, b.init) + b.data.Resize(arrow.Decimal256Traits.BytesRequired(n)) + b.rawData = arrow.Decimal256Traits.CastFromBytes(b.data.Bytes()) + } +} + +// NewArray creates a Decimal256 array from the memory buffers used by the builder and resets the Decimal256Builder +// so it can be used to build a new array. +func (b *Decimal256Builder) NewArray() arrow.Array { + return b.NewDecimal256Array() +} + +// NewDecimal256Array creates a Decimal256 array from the memory buffers used by the builder and resets the Decimal256Builder +// so it can be used to build a new array. +func (b *Decimal256Builder) NewDecimal256Array() (a *Decimal256) { + data := b.newData() + a = NewDecimal256Data(data) + data.Release() + return +} + +func (b *Decimal256Builder) newData() (data *Data) { + bytesRequired := arrow.Decimal256Traits.BytesRequired(b.length) + if bytesRequired > 0 && bytesRequired < b.data.Len() { + // trim buffers + b.data.Resize(bytesRequired) + } + data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) + b.reset() + + if b.data != nil { + b.data.Release() + b.data = nil + b.rawData = nil + } + + return +} + +func (b *Decimal256Builder) unmarshalOne(dec *json.Decoder) error { + t, err := dec.Token() + if err != nil { + return err + } + + var out *big.Float + + switch v := t.(type) { + case float64: + out = big.NewFloat(v) + case string: + // there's no strong rationale for using ToNearestAway, it's just + // what got me the closest equivalent values with the values + // that I tested with, and there isn't a good way to push + // an option all the way down here to control it. + out, _, err = big.ParseFloat(v, 10, 256, big.ToNearestAway) + if err != nil { + return err + } + case json.Number: + out, _, err = big.ParseFloat(v.String(), 10, 256, big.ToNearestAway) + if err != nil { + return err + } + case nil: + b.AppendNull() + return nil + default: + return &json.UnmarshalTypeError{ + Value: fmt.Sprint(t), + Type: reflect.TypeOf(decimal256.Num{}), + Offset: dec.InputOffset(), + } + } + + val, _ := out.Mul(out, big.NewFloat(math.Pow10(int(b.dtype.Scale)))).Int(nil) + b.Append(decimal256.FromBigInt(val)) + return nil +} + +func (b *Decimal256Builder) unmarshal(dec *json.Decoder) error { + for dec.More() { + if err := b.unmarshalOne(dec); err != nil { + return err + } + } + return nil +} + +// UnmarshalJSON will add the unmarshalled values to this builder. +// +// If the values are strings, they will get parsed with big.ParseFloat using +// a rounding mode of big.ToNearestAway currently. +func (b *Decimal256Builder) UnmarshalJSON(data []byte) error { + dec := json.NewDecoder(bytes.NewReader(data)) + t, err := dec.Token() + if err != nil { + return err + } + + if delim, ok := t.(json.Delim); !ok || delim != '[' { + return fmt.Errorf("arrow/array: decimal256 builder must unpack from json array, found %s", delim) + } + + return b.unmarshal(dec) +} + +var ( + _ arrow.Array = (*Decimal256)(nil) + _ Builder = (*Decimal256Builder)(nil) +) diff --git a/go/arrow/array/decimal256_test.go b/go/arrow/array/decimal256_test.go new file mode 100644 index 0000000000000..6f44fd3e01eb0 --- /dev/null +++ b/go/arrow/array/decimal256_test.go @@ -0,0 +1,183 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array_test + +import ( + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/stretchr/testify/assert" +) + +func TestNewDecimal256Builder(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + ab := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 10, Scale: 1}) + defer ab.Release() + + ab.Retain() + ab.Release() + + want := []decimal256.Num{ + decimal256.New(1, 1, 1, 1), + decimal256.New(2, 2, 2, 2), + decimal256.New(3, 3, 3, 3), + {}, + decimal256.FromI64(-5), + decimal256.FromI64(-6), + {}, + decimal256.FromI64(8), + decimal256.FromI64(9), + decimal256.FromI64(10), + } + valids := []bool{true, true, true, false, true, true, false, true, true, true} + + for i, valid := range valids { + switch { + case valid: + ab.Append(want[i]) + default: + ab.AppendNull() + } + } + + // check state of builder before NewDecimal256Array + assert.Equal(t, 10, ab.Len(), "unexpected Len()") + assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") + + a := ab.NewArray().(*array.Decimal256) + a.Retain() + a.Release() + + // check state of builder after NewDecimal256Array + assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDecimal256Array did not reset state") + assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDecimal256Array did not reset state") + assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDecimal256Array did not reset state") + + // check state of array + assert.Equal(t, 2, a.NullN(), "unexpected null count") + + assert.Equal(t, want, a.Values(), "unexpected Decimal256Values") + assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity + assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") + assert.Len(t, a.Values(), 10, "unexpected length of Decimal256Values") + assert.Equal(t, 10*arrow.Decimal256SizeBytes, a.Data().Buffers()[1].Len()) + + a.Release() + ab.Append(decimal256.FromI64(7)) + ab.Append(decimal256.FromI64(8)) + + a = ab.NewDecimal256Array() + + assert.Equal(t, 0, a.NullN()) + assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") + assert.Equal(t, []decimal256.Num{decimal256.FromI64(7), decimal256.FromI64(8)}, a.Values()) + assert.Len(t, a.Values(), 2) + assert.Equal(t, 2*arrow.Decimal256SizeBytes, a.Data().Buffers()[1].Len()) + + a.Release() +} + +func TestDecimal256Builder_Empty(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + ab := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 10, Scale: 1}) + defer ab.Release() + + want := []decimal256.Num{decimal256.FromI64(3), decimal256.FromI64(4)} + + ab.AppendValues([]decimal256.Num{}, nil) + a := ab.NewDecimal256Array() + assert.Zero(t, a.Len()) + a.Release() + + ab.AppendValues(nil, nil) + a = ab.NewDecimal256Array() + assert.Zero(t, a.Len()) + a.Release() + + ab.AppendValues(want, nil) + a = ab.NewDecimal256Array() + assert.Equal(t, want, a.Values()) + a.Release() + + ab.AppendValues([]decimal256.Num{}, nil) + ab.AppendValues(want, nil) + a = ab.NewDecimal256Array() + assert.Equal(t, want, a.Values()) + a.Release() + + ab.AppendValues(want, nil) + ab.AppendValues([]decimal256.Num{}, nil) + a = ab.NewDecimal256Array() + assert.Equal(t, want, a.Values()) + a.Release() +} + +func TestDecimal256Slice(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + dtype := &arrow.Decimal256Type{Precision: 10, Scale: 1} + b := array.NewDecimal256Builder(mem, dtype) + defer b.Release() + + var data = []decimal256.Num{ + decimal256.FromI64(-1), + decimal256.FromI64(+0), + decimal256.FromI64(+1), + decimal256.New(4, 4, 4, 4), + } + b.AppendValues(data[:2], nil) + b.AppendNull() + b.Append(data[3]) + + arr := b.NewDecimal256Array() + defer arr.Release() + + if got, want := arr.Len(), len(data); got != want { + t.Fatalf("invalid array length: got=%d, want=%d", got, want) + } + + slice := array.NewSliceData(arr.Data(), 2, 4) + defer slice.Release() + + sub1 := array.MakeFromData(slice) + defer sub1.Release() + + v, ok := sub1.(*array.Decimal256) + if !ok { + t.Fatalf("could not type-assert to array.String") + } + + if got, want := v.String(), `[(null) {[4 4 4 4]}]`; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + + if got, want := v.NullN(), 1; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + + if got, want := v.Data().Offset(), 2; got != want { + t.Fatalf("invalid offset: got=%d, want=%d", got, want) + } +} diff --git a/go/arrow/array/decimal_test.go b/go/arrow/array/decimal_test.go new file mode 100644 index 0000000000000..65980d25f333a --- /dev/null +++ b/go/arrow/array/decimal_test.go @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array_test + +import ( + "fmt" + "math/big" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/stretchr/testify/suite" +) + +type decimalValue interface{} + +func bitmapFromSlice(vals []bool) []byte { + out := make([]byte, int(bitutil.BytesForBits(int64(len(vals))))) + writer := bitutil.NewBitmapWriter(out, 0, len(vals)) + for _, val := range vals { + if val { + writer.Set() + } else { + writer.Clear() + } + writer.Next() + } + writer.Finish() + return out +} + +type DecimalTestSuite struct { + suite.Suite + + dt arrow.DataType + mem *memory.CheckedAllocator +} + +func (d *DecimalTestSuite) SetupTest() { + d.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) +} + +func (d *DecimalTestSuite) TearDownTest() { + d.mem.AssertSize(d.T(), 0) +} + +func (d *DecimalTestSuite) makeData(input []decimalValue, out []byte) { + switch d.dt.ID() { + case arrow.DECIMAL128: + for _, v := range input { + arrow.Decimal128Traits.PutValue(out, v.(decimal128.Num)) + out = out[arrow.Decimal128SizeBytes:] + } + case arrow.DECIMAL256: + for _, v := range input { + arrow.Decimal256Traits.PutValue(out, v.(decimal256.Num)) + out = out[arrow.Decimal256SizeBytes:] + } + } +} + +func (d *DecimalTestSuite) testCreate(bitWidth int, prec int32, draw []decimalValue, valids []bool, offset int64) arrow.Array { + switch bitWidth { + case 128: + d.dt = &arrow.Decimal128Type{Precision: prec, Scale: 4} + case 256: + d.dt = &arrow.Decimal256Type{Precision: prec, Scale: 4} + } + + bldr := array.NewBuilder(d.mem, d.dt) + defer bldr.Release() + bldr.Reserve(len(draw)) + + nullCount := 0 + for i, b := range valids { + if b { + switch v := draw[i].(type) { + case decimal128.Num: + bldr.(*array.Decimal128Builder).Append(v) + case decimal256.Num: + bldr.(*array.Decimal256Builder).Append(v) + } + } else { + bldr.AppendNull() + nullCount++ + } + } + + arr := bldr.NewArray() + d.EqualValues(0, bldr.Len()) + + rawBytes := make([]byte, len(draw)*(d.dt.(arrow.FixedWidthDataType).BitWidth()/8)) + d.makeData(draw, rawBytes) + + expectedData := memory.NewBufferBytes(rawBytes) + expectedNullBitmap := bitmapFromSlice(valids) + expectedNullCount := len(draw) - bitutil.CountSetBits(expectedNullBitmap, 0, len(valids)) + + expected := array.NewData(d.dt, len(valids), []*memory.Buffer{memory.NewBufferBytes(expectedNullBitmap), expectedData}, nil, expectedNullCount, 0) + defer expected.Release() + + expectedArr := array.MakeFromData(expected) + defer expectedArr.Release() + + lhs := array.NewSlice(arr, offset, int64(arr.Len())-offset) + rhs := array.NewSlice(expectedArr, offset, int64(expectedArr.Len())-offset) + defer func() { + lhs.Release() + rhs.Release() + }() + + d.Truef(array.Equal(lhs, rhs), "expected: %s, got: %s\n", rhs, lhs) + return arr +} + +type Decimal128TestSuite struct { + DecimalTestSuite +} + +func (d *Decimal128TestSuite) runTest(f func(prec int32)) { + for prec := int32(1); prec <= 38; prec++ { + d.Run(fmt.Sprintf("prec=%d", prec), func() { f(prec) }) + } +} + +func (d *Decimal128TestSuite) TestNoNulls() { + d.runTest(func(prec int32) { + draw := []decimalValue{decimal128.FromU64(1), decimal128.FromI64(-2), + decimal128.FromU64(2389), decimal128.FromU64(4), + decimal128.FromI64(-12348)} + valids := []bool{true, true, true, true, true} + arr := d.testCreate(128, prec, draw, valids, 0) + arr.Release() + arr = d.testCreate(128, prec, draw, valids, 2) + arr.Release() + }) +} + +func (d *Decimal128TestSuite) TestWithNulls() { + d.runTest(func(prec int32) { + draw := []decimalValue{decimal128.FromU64(1), decimal128.FromU64(2), + decimal128.FromI64(-1), decimal128.FromI64(4), decimal128.FromI64(-1), + decimal128.FromI64(1), decimal128.FromI64(2)} + bigVal, _ := (&big.Int{}).SetString("230342903942234234", 10) + draw = append(draw, decimal128.FromBigInt(bigVal)) + + bigNeg, _ := (&big.Int{}).SetString("-23049302932235234", 10) + draw = append(draw, decimal128.FromBigInt(bigNeg)) + + valids := []bool{true, true, false, true, false, true, true, true, true} + arr := d.testCreate(128, prec, draw, valids, 0) + arr.Release() + arr = d.testCreate(128, prec, draw, valids, 2) + arr.Release() + }) +} + +type Decimal256TestSuite struct { + DecimalTestSuite +} + +func (d *Decimal256TestSuite) runTest(f func(prec int32)) { + for _, prec := range []int32{1, 2, 5, 10, 38, 39, 40, 75, 76} { + d.Run(fmt.Sprintf("prec=%d", prec), func() { f(prec) }) + } +} + +func (d *Decimal256TestSuite) TestNoNulls() { + d.runTest(func(prec int32) { + draw := []decimalValue{decimal256.FromU64(1), decimal256.FromI64(-2), + decimal256.FromU64(2389), decimal256.FromU64(4), + decimal256.FromI64(-12348)} + valids := []bool{true, true, true, true, true} + arr := d.testCreate(256, prec, draw, valids, 0) + arr.Release() + arr = d.testCreate(256, prec, draw, valids, 2) + arr.Release() + }) +} + +func (d *Decimal256TestSuite) TestWithNulls() { + d.runTest(func(prec int32) { + draw := []decimalValue{decimal256.FromU64(1), decimal256.FromU64(2), + decimal256.FromI64(-1), decimal256.FromI64(4), decimal256.FromI64(-1), + decimal256.FromI64(1), decimal256.FromI64(2)} + + // (pow(2, 255) - 1) + bigVal, _ := (&big.Int{}).SetString("57896044618658097711785492504343953926634992332820282019728792003956564819967", 10) + draw = append(draw, decimal256.FromBigInt(bigVal)) + + draw = append(draw, decimal256.FromBigInt(bigVal.Neg(bigVal))) + + valids := []bool{true, true, false, true, false, true, true, true, true} + arr := d.testCreate(256, prec, draw, valids, 0) + arr.Release() + arr = d.testCreate(256, prec, draw, valids, 2) + arr.Release() + }) +} + +func TestDecimal(t *testing.T) { + suite.Run(t, new(Decimal128TestSuite)) + suite.Run(t, new(Decimal256TestSuite)) +} diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go index 7894e873bb2c5..2409e296ca983 100644 --- a/go/arrow/array/dictionary.go +++ b/go/arrow/array/dictionary.go @@ -25,14 +25,14 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/float16" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/hashing" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/float16" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/hashing" + "github.com/apache/arrow/go/v10/internal/utils" "github.com/goccy/go-json" ) @@ -382,7 +382,7 @@ func createMemoTable(mem memory.Allocator, dt arrow.DataType) (ret hashing.MemoT ret = hashing.NewFloat32MemoTable(0) case arrow.FLOAT64: ret = hashing.NewFloat64MemoTable(0) - case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.DECIMAL128, arrow.INTERVAL_DAY_TIME, arrow.INTERVAL_MONTH_DAY_NANO: + case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.DECIMAL128, arrow.DECIMAL256, arrow.INTERVAL_DAY_TIME, arrow.INTERVAL_MONTH_DAY_NANO: ret = hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)) case arrow.STRING: ret = hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(mem, arrow.BinaryTypes.String)) @@ -620,6 +620,13 @@ func NewDictionaryBuilderWithDict(mem memory.Allocator, dt *arrow.DictionaryType } return ret case arrow.DECIMAL256: + ret := &Decimal256DictionaryBuilder{bldr} + if init != nil { + if err = ret.InsertDictValues(init.(*Decimal256)); err != nil { + panic(err) + } + } + return ret case arrow.LIST: case arrow.STRUCT: case arrow.SPARSE_UNION: @@ -656,6 +663,8 @@ func NewDictionaryBuilder(mem memory.Allocator, dt *arrow.DictionaryType) Dictio return NewDictionaryBuilderWithDict(mem, dt, nil) } +func (b *dictionaryBuilder) Type() arrow.DataType { return b.dt } + func (b *dictionaryBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") @@ -675,6 +684,11 @@ func (b *dictionaryBuilder) AppendNull() { b.idxBuilder.AppendNull() } +func (b *dictionaryBuilder) AppendEmptyValue() { + b.length += 1 + b.idxBuilder.AppendEmptyValue() +} + func (b *dictionaryBuilder) Reserve(n int) { b.idxBuilder.Reserve(n) } @@ -728,19 +742,24 @@ func (b *dictionaryBuilder) NewArray() arrow.Array { return b.NewDictionaryArray() } -func (b *dictionaryBuilder) NewDictionaryArray() *Dictionary { - a := &Dictionary{} - a.refCount = 1 - +func (b *dictionaryBuilder) newData() *Data { indices, dict, err := b.newWithDictOffset(0) if err != nil { panic(err) } - defer indices.Release() indices.dtype = b.dt indices.dictionary = dict + return indices +} + +func (b *dictionaryBuilder) NewDictionaryArray() *Dictionary { + a := &Dictionary{} + a.refCount = 1 + + indices := b.newData() a.setData(indices) + indices.Release() return a } @@ -1230,6 +1249,24 @@ func (b *Decimal128DictionaryBuilder) InsertDictValues(arr *Decimal128) (err err return } +type Decimal256DictionaryBuilder struct { + dictionaryBuilder +} + +func (b *Decimal256DictionaryBuilder) Append(v decimal128.Num) error { + return b.appendValue((*(*[arrow.Decimal256SizeBytes]byte)(unsafe.Pointer(&v)))[:]) +} +func (b *Decimal256DictionaryBuilder) InsertDictValues(arr *Decimal256) (err error) { + data := arrow.Decimal256Traits.CastToBytes(arr.values) + for len(data) > 0 { + if err = b.insertDictValue(data[:arrow.Decimal256SizeBytes]); err != nil { + break + } + data = data[arrow.Decimal256SizeBytes:] + } + return +} + type MonthDayNanoDictionaryBuilder struct { dictionaryBuilder } diff --git a/go/arrow/array/dictionary_test.go b/go/arrow/array/dictionary_test.go index 376d6cbb7caeb..580c57a371aca 100644 --- a/go/arrow/array/dictionary_test.go +++ b/go/arrow/array/dictionary_test.go @@ -23,12 +23,12 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/internal/testing/types" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" diff --git a/go/arrow/array/extension.go b/go/arrow/array/extension.go index 3fb3e77b85807..6de83fa3a6e56 100644 --- a/go/arrow/array/extension.go +++ b/go/arrow/array/extension.go @@ -20,8 +20,8 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -228,6 +228,8 @@ func NewExtensionBuilder(mem memory.Allocator, dt arrow.ExtensionType) *Extensio return &ExtensionBuilder{Builder: NewBuilder(mem, dt.StorageType()), dt: dt} } +func (b *ExtensionBuilder) Type() arrow.DataType { return b.dt } + // StorageBuilder returns the builder for the underlying storage type. func (b *ExtensionBuilder) StorageBuilder() Builder { return b.Builder } diff --git a/go/arrow/array/extension_test.go b/go/arrow/array/extension_test.go index f15bff6e81b08..e2d4cf969afbc 100644 --- a/go/arrow/array/extension_test.go +++ b/go/arrow/array/extension_test.go @@ -19,10 +19,10 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/testing/types" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/array/fixed_size_list.go b/go/arrow/array/fixed_size_list.go index f435068720499..c13f596663f95 100644 --- a/go/arrow/array/fixed_size_list.go +++ b/go/arrow/array/fixed_size_list.go @@ -22,10 +22,10 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -169,6 +169,8 @@ func NewFixedSizeListBuilder(mem memory.Allocator, n int32, etype arrow.DataType } } +func (b *FixedSizeListBuilder) Type() arrow.DataType { return arrow.FixedSizeListOf(b.n, b.etype) } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *FixedSizeListBuilder) Release() { @@ -196,6 +198,13 @@ func (b *FixedSizeListBuilder) AppendNull() { b.unsafeAppendBoolToBitmap(false) } +func (b *FixedSizeListBuilder) AppendEmptyValue() { + b.Append(true) + for i := int32(0); i < b.n; i++ { + b.values.AppendEmptyValue() + } +} + func (b *FixedSizeListBuilder) AppendValues(valid []bool) { b.Reserve(len(valid)) b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) diff --git a/go/arrow/array/fixed_size_list_test.go b/go/arrow/array/fixed_size_list_test.go index e349627d2cff2..ab09b965c7d2f 100644 --- a/go/arrow/array/fixed_size_list_test.go +++ b/go/arrow/array/fixed_size_list_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestFixedSizeListArray(t *testing.T) { diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go index 4f27becbb21a8..24f40363e9b40 100644 --- a/go/arrow/array/fixedsize_binary.go +++ b/go/arrow/array/fixedsize_binary.go @@ -21,7 +21,7 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" "github.com/goccy/go-json" ) diff --git a/go/arrow/array/fixedsize_binary_test.go b/go/arrow/array/fixedsize_binary_test.go index 168d61d39f27e..ec8f2f356378f 100644 --- a/go/arrow/array/fixedsize_binary_test.go +++ b/go/arrow/array/fixedsize_binary_test.go @@ -21,9 +21,9 @@ import ( "github.com/stretchr/testify/assert" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestFixedSizeBinary(t *testing.T) { diff --git a/go/arrow/array/fixedsize_binarybuilder.go b/go/arrow/array/fixedsize_binarybuilder.go index ee5e2d2b3e7ee..7a600c13f5b89 100644 --- a/go/arrow/array/fixedsize_binarybuilder.go +++ b/go/arrow/array/fixedsize_binarybuilder.go @@ -23,9 +23,9 @@ import ( "reflect" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -46,6 +46,8 @@ func NewFixedSizeBinaryBuilder(mem memory.Allocator, dtype *arrow.FixedSizeBinar return b } +func (b *FixedSizeBinaryBuilder) Type() arrow.DataType { return b.dtype } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. // Release may be called simultaneously from multiple goroutines. @@ -81,6 +83,12 @@ func (b *FixedSizeBinaryBuilder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *FixedSizeBinaryBuilder) AppendEmptyValue() { + b.Reserve(1) + b.values.Advance(b.dtype.ByteWidth) + b.UnsafeAppendBoolToBitmap(true) +} + // AppendValues will append the values in the v slice. The valid slice determines which values // in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, // all values in v are appended and considered valid. diff --git a/go/arrow/array/fixedsize_binarybuilder_test.go b/go/arrow/array/fixedsize_binarybuilder_test.go index 8c8dc5a313c6a..5564f34f4afdd 100644 --- a/go/arrow/array/fixedsize_binarybuilder_test.go +++ b/go/arrow/array/fixedsize_binarybuilder_test.go @@ -19,8 +19,8 @@ package array import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/float16.go b/go/arrow/array/float16.go index 914f86da252f6..9e63a7e76a904 100644 --- a/go/arrow/array/float16.go +++ b/go/arrow/array/float16.go @@ -20,8 +20,8 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/float16" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/float16" "github.com/goccy/go-json" ) diff --git a/go/arrow/array/float16_builder.go b/go/arrow/array/float16_builder.go index 2f49756aa5932..4947981debb03 100644 --- a/go/arrow/array/float16_builder.go +++ b/go/arrow/array/float16_builder.go @@ -23,11 +23,11 @@ import ( "strconv" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/float16" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/float16" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -42,6 +42,8 @@ func NewFloat16Builder(mem memory.Allocator) *Float16Builder { return &Float16Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Float16Builder) Type() arrow.DataType { return arrow.FixedWidthTypes.Float16 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Float16Builder) Release() { @@ -76,6 +78,11 @@ func (b *Float16Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Float16Builder) AppendEmptyValue() { + b.Reserve(1) + b.UnsafeAppend(float16.Num{}) +} + func (b *Float16Builder) UnsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) diff --git a/go/arrow/array/float16_builder_test.go b/go/arrow/array/float16_builder_test.go index 501a777cce584..e42a5119e84bf 100644 --- a/go/arrow/array/float16_builder_test.go +++ b/go/arrow/array/float16_builder_test.go @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/float16" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/float16" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/interval.go b/go/arrow/array/interval.go index 6953175a00839..4a51195aed468 100644 --- a/go/arrow/array/interval.go +++ b/go/arrow/array/interval.go @@ -22,10 +22,10 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -136,6 +136,8 @@ func NewMonthIntervalBuilder(mem memory.Allocator) *MonthIntervalBuilder { return &MonthIntervalBuilder{builder: builder{refCount: 1, mem: mem}} } +func (b *MonthIntervalBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.MonthInterval } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *MonthIntervalBuilder) Release() { @@ -164,6 +166,10 @@ func (b *MonthIntervalBuilder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *MonthIntervalBuilder) AppendEmptyValue() { + b.Append(arrow.MonthInterval(0)) +} + func (b *MonthIntervalBuilder) UnsafeAppend(v arrow.MonthInterval) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -393,6 +399,8 @@ func NewDayTimeIntervalBuilder(mem memory.Allocator) *DayTimeIntervalBuilder { return &DayTimeIntervalBuilder{builder: builder{refCount: 1, mem: mem}} } +func (b *DayTimeIntervalBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.DayTimeInterval } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *DayTimeIntervalBuilder) Release() { @@ -421,6 +429,10 @@ func (b *DayTimeIntervalBuilder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *DayTimeIntervalBuilder) AppendEmptyValue() { + b.Append(arrow.DayTimeInterval{}) +} + func (b *DayTimeIntervalBuilder) UnsafeAppend(v arrow.DayTimeInterval) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -651,6 +663,10 @@ func NewMonthDayNanoIntervalBuilder(mem memory.Allocator) *MonthDayNanoIntervalB return &MonthDayNanoIntervalBuilder{builder: builder{refCount: 1, mem: mem}} } +func (b *MonthDayNanoIntervalBuilder) Type() arrow.DataType { + return arrow.FixedWidthTypes.MonthDayNanoInterval +} + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *MonthDayNanoIntervalBuilder) Release() { @@ -679,6 +695,10 @@ func (b *MonthDayNanoIntervalBuilder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *MonthDayNanoIntervalBuilder) AppendEmptyValue() { + b.Append(arrow.MonthDayNanoInterval{}) +} + func (b *MonthDayNanoIntervalBuilder) UnsafeAppend(v arrow.MonthDayNanoInterval) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v diff --git a/go/arrow/array/interval_test.go b/go/arrow/array/interval_test.go index f5d91e8f77f8e..17a49d94d453d 100644 --- a/go/arrow/array/interval_test.go +++ b/go/arrow/array/interval_test.go @@ -20,9 +20,9 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/json_reader.go b/go/arrow/array/json_reader.go index 4db1b85213b17..0056a34a44bbe 100644 --- a/go/arrow/array/json_reader.go +++ b/go/arrow/array/json_reader.go @@ -22,9 +22,9 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) diff --git a/go/arrow/array/json_reader_test.go b/go/arrow/array/json_reader_test.go index 74b451a46446d..43e6682427032 100644 --- a/go/arrow/array/json_reader_test.go +++ b/go/arrow/array/json_reader_test.go @@ -20,9 +20,9 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 4042b680fb789..07e38944348ac 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -22,13 +22,19 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) +type ListLike interface { + arrow.Array + ListValues() arrow.Array + ValueOffsets(i int) (start, end int64) +} + // List represents an immutable sequence of array values. type List struct { array @@ -146,28 +152,237 @@ func (a *List) Release() { a.values.Release() } -type ListBuilder struct { +func (a *List) ValueOffsets(i int) (start, end int64) { + debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") + start, end = int64(a.offsets[i]), int64(a.offsets[i+1]) + return +} + +// LargeList represents an immutable sequence of array values. +type LargeList struct { + array + values arrow.Array + offsets []int64 +} + +// NewLargeListData returns a new LargeList array value, from data. +func NewLargeListData(data arrow.ArrayData) *LargeList { + a := new(LargeList) + a.refCount = 1 + a.setData(data.(*Data)) + return a +} + +func (a *LargeList) ListValues() arrow.Array { return a.values } + +func (a *LargeList) String() string { + o := new(strings.Builder) + o.WriteString("[") + for i := 0; i < a.Len(); i++ { + if i > 0 { + o.WriteString(" ") + } + if !a.IsValid(i) { + o.WriteString("(null)") + continue + } + sub := a.newListValue(i) + fmt.Fprintf(o, "%v", sub) + sub.Release() + } + o.WriteString("]") + return o.String() +} + +func (a *LargeList) newListValue(i int) arrow.Array { + j := i + a.array.data.offset + beg := int64(a.offsets[j]) + end := int64(a.offsets[j+1]) + return NewSlice(a.values, beg, end) +} + +func (a *LargeList) setData(data *Data) { + a.array.setData(data) + vals := data.buffers[1] + if vals != nil { + a.offsets = arrow.Int64Traits.CastFromBytes(vals.Bytes()) + } + a.values = MakeFromData(data.childData[0]) +} + +func (a *LargeList) getOneForMarshal(i int) interface{} { + if a.IsNull(i) { + return nil + } + + slice := a.newListValue(i) + defer slice.Release() + v, err := json.Marshal(slice) + if err != nil { + panic(err) + } + return json.RawMessage(v) +} + +func (a *LargeList) MarshalJSON() ([]byte, error) { + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + + buf.WriteByte('[') + for i := 0; i < a.Len(); i++ { + if i != 0 { + buf.WriteByte(',') + } + if err := enc.Encode(a.getOneForMarshal(i)); err != nil { + return nil, err + } + } + buf.WriteByte(']') + return buf.Bytes(), nil +} + +func arrayEqualLargeList(left, right *LargeList) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + o := func() bool { + l := left.newListValue(i) + defer l.Release() + r := right.newListValue(i) + defer r.Release() + return Equal(l, r) + }() + if !o { + return false + } + } + return true +} + +// Len returns the number of elements in the array. +func (a *LargeList) Len() int { return a.array.Len() } + +func (a *LargeList) Offsets() []int64 { return a.offsets } + +func (a *LargeList) ValueOffsets(i int) (start, end int64) { + debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") + start, end = a.offsets[i], a.offsets[i+1] + return +} + +func (a *LargeList) Retain() { + a.array.Retain() + a.values.Retain() +} + +func (a *LargeList) Release() { + a.array.Release() + a.values.Release() +} + +type baseListBuilder struct { builder - etype arrow.DataType // data type of the list's elements. - values Builder // value builder for the list's elements. - offsets *Int32Builder + values Builder // value builder for the list's elements. + offsets Builder + + // actual list type + dt arrow.DataType + appendOffsetVal func(int) +} + +type ListLikeBuilder interface { + Builder + ValueBuilder() Builder + Append(bool) +} + +type ListBuilder struct { + baseListBuilder +} + +type LargeListBuilder struct { + baseListBuilder } // NewListBuilder returns a builder, using the provided memory allocator. // The created list builder will create a list whose elements will be of type etype. func NewListBuilder(mem memory.Allocator, etype arrow.DataType) *ListBuilder { + offsetBldr := NewInt32Builder(mem) + return &ListBuilder{ + baseListBuilder{ + builder: builder{refCount: 1, mem: mem}, + values: NewBuilder(mem, etype), + offsets: offsetBldr, + dt: arrow.ListOf(etype), + appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, + }, + } +} + +// NewListBuilderWithField takes a field to use for the child rather than just +// a datatype to allow for more customization. +func NewListBuilderWithField(mem memory.Allocator, field arrow.Field) *ListBuilder { + offsetBldr := NewInt32Builder(mem) return &ListBuilder{ - builder: builder{refCount: 1, mem: mem}, - etype: etype, - values: NewBuilder(mem, etype), - offsets: NewInt32Builder(mem), + baseListBuilder{ + builder: builder{refCount: 1, mem: mem}, + values: NewBuilder(mem, field.Type), + offsets: offsetBldr, + dt: arrow.ListOfField(field), + appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, + }, + } +} + +func (b *baseListBuilder) Type() arrow.DataType { + switch dt := b.dt.(type) { + case *arrow.ListType: + f := dt.ElemField() + f.Type = b.values.Type() + return arrow.ListOfField(f) + case *arrow.LargeListType: + f := dt.ElemField() + f.Type = b.values.Type() + return arrow.LargeListOfField(f) + } + return nil +} + +// NewLargeListBuilder returns a builder, using the provided memory allocator. +// The created list builder will create a list whose elements will be of type etype. +func NewLargeListBuilder(mem memory.Allocator, etype arrow.DataType) *LargeListBuilder { + offsetBldr := NewInt64Builder(mem) + return &LargeListBuilder{ + baseListBuilder{ + builder: builder{refCount: 1, mem: mem}, + values: NewBuilder(mem, etype), + offsets: offsetBldr, + dt: arrow.LargeListOf(etype), + appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, + }, + } +} + +// NewLargeListBuilderWithField takes a field rather than just an element type +// to allow for more customization of the final type of the LargeList Array +func NewLargeListBuilderWithField(mem memory.Allocator, field arrow.Field) *LargeListBuilder { + offsetBldr := NewInt64Builder(mem) + return &LargeListBuilder{ + baseListBuilder{ + builder: builder{refCount: 1, mem: mem}, + values: NewBuilder(mem, field.Type), + offsets: offsetBldr, + dt: arrow.LargeListOfField(field), + appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, + }, } } // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. -func (b *ListBuilder) Release() { +func (b *baseListBuilder) Release() { debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") if atomic.AddInt64(&b.refCount, -1) == 0 { @@ -175,35 +390,45 @@ func (b *ListBuilder) Release() { b.nullBitmap.Release() b.nullBitmap = nil } + b.values.Release() + b.offsets.Release() } - b.values.Release() - b.offsets.Release() } -func (b *ListBuilder) appendNextOffset() { - b.offsets.Append(int32(b.values.Len())) +func (b *baseListBuilder) appendNextOffset() { + b.appendOffsetVal(b.values.Len()) } -func (b *ListBuilder) Append(v bool) { +func (b *baseListBuilder) Append(v bool) { b.Reserve(1) b.unsafeAppendBoolToBitmap(v) b.appendNextOffset() } -func (b *ListBuilder) AppendNull() { +func (b *baseListBuilder) AppendNull() { b.Reserve(1) b.unsafeAppendBoolToBitmap(false) b.appendNextOffset() } +func (b *baseListBuilder) AppendEmptyValue() { + b.Append(true) +} + func (b *ListBuilder) AppendValues(offsets []int32, valid []bool) { b.Reserve(len(valid)) - b.offsets.AppendValues(offsets, nil) + b.offsets.(*Int32Builder).AppendValues(offsets, nil) b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) } -func (b *ListBuilder) unsafeAppendBoolToBitmap(isValid bool) { +func (b *LargeListBuilder) AppendValues(offsets []int64, valid []bool) { + b.Reserve(len(valid)) + b.offsets.(*Int64Builder).AppendValues(offsets, nil) + b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) +} + +func (b *baseListBuilder) unsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) } else { @@ -212,26 +437,26 @@ func (b *ListBuilder) unsafeAppendBoolToBitmap(isValid bool) { b.length++ } -func (b *ListBuilder) init(capacity int) { +func (b *baseListBuilder) init(capacity int) { b.builder.init(capacity) b.offsets.init(capacity + 1) } // Reserve ensures there is enough space for appending n elements // by checking the capacity and calling Resize if necessary. -func (b *ListBuilder) Reserve(n int) { +func (b *baseListBuilder) Reserve(n int) { b.builder.reserve(n, b.resizeHelper) b.offsets.Reserve(n) } // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), // additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *ListBuilder) Resize(n int) { +func (b *baseListBuilder) Resize(n int) { b.resizeHelper(n) b.offsets.Resize(n) } -func (b *ListBuilder) resizeHelper(n int) { +func (b *baseListBuilder) resizeHelper(n int) { if n < minBuilderCapacity { n = minBuilderCapacity } @@ -243,7 +468,7 @@ func (b *ListBuilder) resizeHelper(n int) { } } -func (b *ListBuilder) ValueBuilder() Builder { +func (b *baseListBuilder) ValueBuilder() Builder { return b.values } @@ -253,31 +478,46 @@ func (b *ListBuilder) NewArray() arrow.Array { return b.NewListArray() } +// NewArray creates a LargeList array from the memory buffers used by the builder and resets the LargeListBuilder +// so it can be used to build a new array. +func (b *LargeListBuilder) NewArray() arrow.Array { + return b.NewLargeListArray() +} + // NewListArray creates a List array from the memory buffers used by the builder and resets the ListBuilder // so it can be used to build a new array. func (b *ListBuilder) NewListArray() (a *List) { - if b.offsets.Len() != b.length+1 { - b.appendNextOffset() - } data := b.newData() a = NewListData(data) data.Release() return } -func (b *ListBuilder) newData() (data *Data) { +// NewLargeListArray creates a List array from the memory buffers used by the builder and resets the LargeListBuilder +// so it can be used to build a new array. +func (b *LargeListBuilder) NewLargeListArray() (a *LargeList) { + data := b.newData() + a = NewLargeListData(data) + data.Release() + return +} + +func (b *baseListBuilder) newData() (data *Data) { + if b.offsets.Len() != b.length+1 { + b.appendNextOffset() + } values := b.values.NewArray() defer values.Release() var offsets *memory.Buffer if b.offsets != nil { - arr := b.offsets.NewInt32Array() + arr := b.offsets.NewArray() defer arr.Release() offsets = arr.Data().Buffers()[1] } data = NewData( - arrow.ListOf(b.etype), b.length, + b.Type(), b.length, []*memory.Buffer{ b.nullBitmap, offsets, @@ -291,7 +531,7 @@ func (b *ListBuilder) newData() (data *Data) { return } -func (b *ListBuilder) unmarshalOne(dec *json.Decoder) error { +func (b *baseListBuilder) unmarshalOne(dec *json.Decoder) error { t, err := dec.Token() if err != nil { return err @@ -311,14 +551,14 @@ func (b *ListBuilder) unmarshalOne(dec *json.Decoder) error { default: return &json.UnmarshalTypeError{ Value: fmt.Sprint(t), - Struct: arrow.ListOf(b.etype).String(), + Struct: b.dt.String(), } } return nil } -func (b *ListBuilder) unmarshal(dec *json.Decoder) error { +func (b *baseListBuilder) unmarshal(dec *json.Decoder) error { for dec.More() { if err := b.unmarshalOne(dec); err != nil { return err @@ -327,7 +567,7 @@ func (b *ListBuilder) unmarshal(dec *json.Decoder) error { return nil } -func (b *ListBuilder) UnmarshalJSON(data []byte) error { +func (b *baseListBuilder) UnmarshalJSON(data []byte) error { dec := json.NewDecoder(bytes.NewReader(data)) t, err := dec.Token() if err != nil { @@ -343,5 +583,7 @@ func (b *ListBuilder) UnmarshalJSON(data []byte) error { var ( _ arrow.Array = (*List)(nil) + _ arrow.Array = (*LargeList)(nil) _ Builder = (*ListBuilder)(nil) + _ Builder = (*LargeListBuilder)(nil) ) diff --git a/go/arrow/array/list_test.go b/go/arrow/array/list_test.go index bbcde7c1e0685..eb09f655d7e52 100644 --- a/go/arrow/array/list_test.go +++ b/go/arrow/array/list_test.go @@ -20,194 +20,282 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestListArray(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 4} - isValid = []bool{true, false, true} - offsets = []int32{0, 3, 3, 7} - ) - - lb := array.NewListBuilder(pool, arrow.PrimitiveTypes.Int32) - defer lb.Release() - - for i := 0; i < 10; i++ { - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - pos := 0 - for i, length := range lengths { - lb.Append(isValid[i]) - for j := 0; j < length; j++ { - vb.Append(vs[pos]) - pos++ - } - } - - arr := lb.NewArray().(*array.List) - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.DataType().ID(), arrow.LIST; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } + tests := []struct { + typeID arrow.Type + offsets interface{} + dt arrow.DataType + }{ + {arrow.LIST, []int32{0, 3, 3, 3, 7}, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LIST, []int32{0, 3, 3, 3, 7}, arrow.ListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, + {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, arrow.LargeListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, + } - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + for _, tt := range tests { + t.Run(tt.typeID.String(), func(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + var ( + vs = []int32{0, 1, 2, 3, 4, 5, 6} + lengths = []int{3, 0, 0, 4} + isValid = []bool{true, false, true, true} + ) + + lb := array.NewBuilder(pool, tt.dt).(array.ListLikeBuilder) + defer lb.Release() + + for i := 0; i < 10; i++ { + vb := lb.ValueBuilder().(*array.Int32Builder) + vb.Reserve(len(vs)) + + pos := 0 + for i, length := range lengths { + lb.Append(isValid[i]) + for j := 0; j < length; j++ { + vb.Append(vs[pos]) + pos++ + } + } + + arr := lb.NewArray().(array.ListLike) + defer arr.Release() + + arr.Retain() + arr.Release() + + if got, want := arr.DataType().ID(), tt.typeID; got != want { + t.Fatalf("got=%v, want=%v", got, want) + } + + if got, want := arr.Len(), len(isValid); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + for i := range lengths { + if got, want := arr.IsValid(i), isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + if got, want := arr.IsNull(i), !isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + } + + var got interface{} + switch tt.typeID { + case arrow.LIST: + arr := arr.(*array.List) + got = arr.Offsets() + case arrow.LARGE_LIST: + arr := arr.(*array.LargeList) + got = arr.Offsets() + } + + if !reflect.DeepEqual(got, tt.offsets) { + t.Fatalf("got=%v, want=%v", got, tt.offsets) + } + + varr := arr.ListValues().(*array.Int32) + if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } } - if got, want := arr.IsNull(i), lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - if got, want := arr.Offsets(), offsets; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } + }) } + } func TestListArrayEmpty(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - lb := array.NewListBuilder(pool, arrow.PrimitiveTypes.Int32) - defer lb.Release() - arr := lb.NewArray().(*array.List) - defer arr.Release() - if got, want := arr.Len(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) + typ := []arrow.DataType{ + arrow.ListOf(arrow.PrimitiveTypes.Int32), + arrow.LargeListOf(arrow.PrimitiveTypes.Int32), + } + + for _, dt := range typ { + t.Run(dt.String(), func(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + lb := array.NewBuilder(pool, dt) + defer lb.Release() + arr := lb.NewArray() + defer arr.Release() + if got, want := arr.Len(), 0; got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + }) } } func TestListArrayBulkAppend(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 4} - isValid = []bool{true, false, true} - offsets = []int32{0, 3, 3, 7} - ) - - lb := array.NewListBuilder(pool, arrow.PrimitiveTypes.Int32) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - lb.AppendValues(offsets, isValid) - for _, v := range vs { - vb.Append(v) + tests := []struct { + typeID arrow.Type + offsets interface{} + dt arrow.DataType + }{ + {arrow.LIST, []int32{0, 3, 3, 3, 7}, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, } - arr := lb.NewArray().(*array.List) - defer arr.Release() + for _, tt := range tests { + t.Run(tt.typeID.String(), func(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + var ( + vs = []int32{0, 1, 2, 3, 4, 5, 6} + lengths = []int{3, 0, 0, 4} + isValid = []bool{true, false, true, true} + ) + + lb := array.NewBuilder(pool, tt.dt).(array.ListLikeBuilder) + defer lb.Release() + vb := lb.ValueBuilder().(*array.Int32Builder) + vb.Reserve(len(vs)) + + switch tt.typeID { + case arrow.LIST: + lb.(*array.ListBuilder).AppendValues(tt.offsets.([]int32), isValid) + case arrow.LARGE_LIST: + lb.(*array.LargeListBuilder).AppendValues(tt.offsets.([]int64), isValid) + } + for _, v := range vs { + vb.Append(v) + } - if got, want := arr.DataType().ID(), arrow.LIST; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } + arr := lb.NewArray().(array.ListLike) + defer arr.Release() - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } + if got, want := arr.DataType().ID(), tt.typeID; got != want { + t.Fatalf("got=%v, want=%v", got, want) + } - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } + if got, want := arr.Len(), len(isValid); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } - if got, want := arr.Offsets(), offsets; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } + for i := range lengths { + if got, want := arr.IsValid(i), isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + if got, want := arr.IsNull(i), !isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + } + + var got interface{} + switch tt.typeID { + case arrow.LIST: + arr := arr.(*array.List) + got = arr.Offsets() + case arrow.LARGE_LIST: + arr := arr.(*array.LargeList) + got = arr.Offsets() + } - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) + if !reflect.DeepEqual(got, tt.offsets) { + t.Fatalf("got=%v, want=%v", got, tt.offsets) + } + + varr := arr.ListValues().(*array.Int32) + if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + }) } } func TestListArraySlice(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 4} - isValid = []bool{true, false, true} - offsets = []int32{0, 3, 3, 7} - ) - - lb := array.NewListBuilder(pool, arrow.PrimitiveTypes.Int32) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - lb.AppendValues(offsets, isValid) - for _, v := range vs { - vb.Append(v) + tests := []struct { + typeID arrow.Type + offsets interface{} + dt arrow.DataType + }{ + {arrow.LIST, []int32{0, 3, 3, 3, 7}, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, } - arr := lb.NewArray().(*array.List) - defer arr.Release() + for _, tt := range tests { + t.Run(tt.typeID.String(), func(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + var ( + vs = []int32{0, 1, 2, 3, 4, 5, 6} + lengths = []int{3, 0, 0, 4} + isValid = []bool{true, false, true, true} + ) + + lb := array.NewBuilder(pool, tt.dt).(array.ListLikeBuilder) + defer lb.Release() + vb := lb.ValueBuilder().(*array.Int32Builder) + vb.Reserve(len(vs)) + + switch tt.typeID { + case arrow.LIST: + lb.(*array.ListBuilder).AppendValues(tt.offsets.([]int32), isValid) + case arrow.LARGE_LIST: + lb.(*array.LargeListBuilder).AppendValues(tt.offsets.([]int64), isValid) + } + for _, v := range vs { + vb.Append(v) + } - if got, want := arr.DataType().ID(), arrow.LIST; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } + arr := lb.NewArray().(array.ListLike) + defer arr.Release() - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } + if got, want := arr.DataType().ID(), tt.typeID; got != want { + t.Fatalf("got=%v, want=%v", got, want) + } - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } + if got, want := arr.Len(), len(isValid); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } - if got, want := arr.Offsets(), offsets; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } + for i := range lengths { + if got, want := arr.IsValid(i), isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + if got, want := arr.IsNull(i), !isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + } - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } + var got interface{} + switch tt.typeID { + case arrow.LIST: + arr := arr.(*array.List) + got = arr.Offsets() + case arrow.LARGE_LIST: + arr := arr.(*array.LargeList) + got = arr.Offsets() + } - if got, want := arr.String(), `[[0 1 2] (null) [3 4 5 6]]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } + if !reflect.DeepEqual(got, tt.offsets) { + t.Fatalf("got=%v, want=%v", got, tt.offsets) + } + + varr := arr.ListValues().(*array.Int32) + if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } - sub := array.NewSlice(arr, 1, 3).(*array.List) - defer sub.Release() + if got, want := arr.String(), `[[0 1 2] (null) [] [3 4 5 6]]`; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + + sub := array.NewSlice(arr, 1, 4).(array.ListLike) + defer sub.Release() - if got, want := sub.String(), `[(null) [3 4 5 6]]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) + if got, want := sub.String(), `[(null) [] [3 4 5 6]]`; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + }) } } diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go index e9a1f2a2aa90e..9c35fbe3ca270 100644 --- a/go/arrow/array/map.go +++ b/go/arrow/array/map.go @@ -20,8 +20,8 @@ import ( "bytes" "fmt" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -165,6 +165,8 @@ func NewMapBuilder(mem memory.Allocator, keytype, itemtype arrow.DataType, keysS } } +func (b *MapBuilder) Type() arrow.DataType { return b.etype } + // Retain increases the reference count by 1 for the sub-builders (list, key, item). // Retain may be called simultaneously from multiple goroutines. func (b *MapBuilder) Retain() { @@ -202,6 +204,10 @@ func (b *MapBuilder) AppendNull() { b.Append(false) } +func (b *MapBuilder) AppendEmptyValue() { + b.Append(true) +} + // Reserve enough space for n maps func (b *MapBuilder) Reserve(n int) { b.listBuilder.Reserve(n) } diff --git a/go/arrow/array/map_test.go b/go/arrow/array/map_test.go index 1d8219a3519fe..059b9c60361a1 100644 --- a/go/arrow/array/map_test.go +++ b/go/arrow/array/map_test.go @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/null.go b/go/arrow/array/null.go index d405c65fac242..ddc5cf3833e61 100644 --- a/go/arrow/array/null.go +++ b/go/arrow/array/null.go @@ -23,9 +23,9 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -94,6 +94,8 @@ func NewNullBuilder(mem memory.Allocator) *NullBuilder { return &NullBuilder{builder: builder{refCount: 1, mem: mem}} } +func (b *NullBuilder) Type() arrow.DataType { return arrow.Null } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *NullBuilder) Release() { @@ -112,6 +114,8 @@ func (b *NullBuilder) AppendNull() { b.builder.nulls++ } +func (b *NullBuilder) AppendEmptyValue() { b.AppendNull() } + func (*NullBuilder) Reserve(size int) {} func (*NullBuilder) Resize(size int) {} diff --git a/go/arrow/array/null_test.go b/go/arrow/array/null_test.go index 951542b635f3a..9165249f73076 100644 --- a/go/arrow/array/null_test.go +++ b/go/arrow/array/null_test.go @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestNullArray(t *testing.T) { diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go index b073a3c9a2183..d850a113b2e5b 100644 --- a/go/arrow/array/numeric.gen.go +++ b/go/arrow/array/numeric.gen.go @@ -22,7 +22,7 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" "github.com/goccy/go-json" ) diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl index ccf37d11ce0b9..b6b7c11e4ea57 100644 --- a/go/arrow/array/numeric.gen.go.tmpl +++ b/go/arrow/array/numeric.gen.go.tmpl @@ -21,7 +21,7 @@ import ( "strings" "time" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" "github.com/goccy/go-json" ) diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go index ffc26bac82cce..5333b4424bfa6 100644 --- a/go/arrow/array/numeric_test.go +++ b/go/arrow/array/numeric_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go index da944aad3e7ef..ed71d8223866f 100644 --- a/go/arrow/array/numericbuilder.gen.go +++ b/go/arrow/array/numericbuilder.gen.go @@ -27,10 +27,10 @@ import ( "sync/atomic" "time" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -45,6 +45,8 @@ func NewInt64Builder(mem memory.Allocator) *Int64Builder { return &Int64Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Int64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int64 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Int64Builder) Release() { @@ -73,6 +75,10 @@ func (b *Int64Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Int64Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Int64Builder) UnsafeAppend(v int64) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -248,6 +254,8 @@ func NewUint64Builder(mem memory.Allocator) *Uint64Builder { return &Uint64Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Uint64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint64 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Uint64Builder) Release() { @@ -276,6 +284,10 @@ func (b *Uint64Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Uint64Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Uint64Builder) UnsafeAppend(v uint64) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -451,6 +463,8 @@ func NewFloat64Builder(mem memory.Allocator) *Float64Builder { return &Float64Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Float64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Float64 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Float64Builder) Release() { @@ -479,6 +493,10 @@ func (b *Float64Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Float64Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Float64Builder) UnsafeAppend(v float64) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -654,6 +672,8 @@ func NewInt32Builder(mem memory.Allocator) *Int32Builder { return &Int32Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Int32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int32 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Int32Builder) Release() { @@ -682,6 +702,10 @@ func (b *Int32Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Int32Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Int32Builder) UnsafeAppend(v int32) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -857,6 +881,8 @@ func NewUint32Builder(mem memory.Allocator) *Uint32Builder { return &Uint32Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Uint32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint32 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Uint32Builder) Release() { @@ -885,6 +911,10 @@ func (b *Uint32Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Uint32Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Uint32Builder) UnsafeAppend(v uint32) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -1060,6 +1090,8 @@ func NewFloat32Builder(mem memory.Allocator) *Float32Builder { return &Float32Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Float32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Float32 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Float32Builder) Release() { @@ -1088,6 +1120,10 @@ func (b *Float32Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Float32Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Float32Builder) UnsafeAppend(v float32) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -1263,6 +1299,8 @@ func NewInt16Builder(mem memory.Allocator) *Int16Builder { return &Int16Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Int16Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int16 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Int16Builder) Release() { @@ -1291,6 +1329,10 @@ func (b *Int16Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Int16Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Int16Builder) UnsafeAppend(v int16) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -1466,6 +1508,8 @@ func NewUint16Builder(mem memory.Allocator) *Uint16Builder { return &Uint16Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Uint16Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint16 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Uint16Builder) Release() { @@ -1494,6 +1538,10 @@ func (b *Uint16Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Uint16Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Uint16Builder) UnsafeAppend(v uint16) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -1669,6 +1717,8 @@ func NewInt8Builder(mem memory.Allocator) *Int8Builder { return &Int8Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Int8Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int8 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Int8Builder) Release() { @@ -1697,6 +1747,10 @@ func (b *Int8Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Int8Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Int8Builder) UnsafeAppend(v int8) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -1872,6 +1926,8 @@ func NewUint8Builder(mem memory.Allocator) *Uint8Builder { return &Uint8Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Uint8Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint8 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Uint8Builder) Release() { @@ -1900,6 +1956,10 @@ func (b *Uint8Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Uint8Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Uint8Builder) UnsafeAppend(v uint8) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -2076,6 +2136,8 @@ func NewTimestampBuilder(mem memory.Allocator, dtype *arrow.TimestampType) *Time return &TimestampBuilder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} } +func (b *TimestampBuilder) Type() arrow.DataType { return b.dtype } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *TimestampBuilder) Release() { @@ -2104,6 +2166,10 @@ func (b *TimestampBuilder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *TimestampBuilder) AppendEmptyValue() { + b.Append(0) +} + func (b *TimestampBuilder) UnsafeAppend(v arrow.Timestamp) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -2282,6 +2348,8 @@ func NewTime32Builder(mem memory.Allocator, dtype *arrow.Time32Type) *Time32Buil return &Time32Builder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} } +func (b *Time32Builder) Type() arrow.DataType { return b.dtype } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Time32Builder) Release() { @@ -2310,6 +2378,10 @@ func (b *Time32Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Time32Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Time32Builder) UnsafeAppend(v arrow.Time32) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -2487,6 +2559,8 @@ func NewTime64Builder(mem memory.Allocator, dtype *arrow.Time64Type) *Time64Buil return &Time64Builder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} } +func (b *Time64Builder) Type() arrow.DataType { return b.dtype } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Time64Builder) Release() { @@ -2515,6 +2589,10 @@ func (b *Time64Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Time64Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Time64Builder) UnsafeAppend(v arrow.Time64) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -2691,6 +2769,8 @@ func NewDate32Builder(mem memory.Allocator) *Date32Builder { return &Date32Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Date32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Date32 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Date32Builder) Release() { @@ -2719,6 +2799,10 @@ func (b *Date32Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Date32Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Date32Builder) UnsafeAppend(v arrow.Date32) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -2894,6 +2978,8 @@ func NewDate64Builder(mem memory.Allocator) *Date64Builder { return &Date64Builder{builder: builder{refCount: 1, mem: mem}} } +func (b *Date64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Date64 } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *Date64Builder) Release() { @@ -2922,6 +3008,10 @@ func (b *Date64Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *Date64Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *Date64Builder) UnsafeAppend(v arrow.Date64) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v @@ -3098,6 +3188,8 @@ func NewDurationBuilder(mem memory.Allocator, dtype *arrow.DurationType) *Durati return &DurationBuilder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} } +func (b *DurationBuilder) Type() arrow.DataType { return b.dtype } + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *DurationBuilder) Release() { @@ -3126,6 +3218,10 @@ func (b *DurationBuilder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *DurationBuilder) AppendEmptyValue() { + b.Append(0) +} + func (b *DurationBuilder) UnsafeAppend(v arrow.Duration) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl index 1f67a1b6bc9dc..f912c076fe3a6 100644 --- a/go/arrow/array/numericbuilder.gen.go.tmpl +++ b/go/arrow/array/numericbuilder.gen.go.tmpl @@ -17,10 +17,10 @@ package array import ( - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -40,10 +40,15 @@ type {{.Name}}Builder struct { func New{{.Name}}Builder(mem memory.Allocator, dtype *arrow.{{.Name}}Type) *{{.Name}}Builder { return &{{.Name}}Builder{builder: builder{refCount:1, mem: mem}, dtype: dtype} } + +func (b *{{.Name}}Builder) Type() arrow.DataType { return b.dtype } + {{else}} func New{{.Name}}Builder(mem memory.Allocator) *{{.Name}}Builder { return &{{.Name}}Builder{builder: builder{refCount:1, mem: mem}} } + +func (b *{{.Name}}Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.{{.Name}} } {{end}} // Release decreases the reference count by 1. @@ -74,6 +79,10 @@ func (b *{{.Name}}Builder) AppendNull() { b.UnsafeAppendBoolToBitmap(false) } +func (b *{{.Name}}Builder) AppendEmptyValue() { + b.Append(0) +} + func (b *{{.Name}}Builder) UnsafeAppend(v {{or .QualifiedType .Type}}) { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) b.rawData[b.length] = v diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go index 4b3d50457eec0..08119743dac4c 100644 --- a/go/arrow/array/numericbuilder.gen_test.go +++ b/go/arrow/array/numericbuilder.gen_test.go @@ -21,9 +21,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numericbuilder.gen_test.go.tmpl b/go/arrow/array/numericbuilder.gen_test.go.tmpl index 382f7f0894dbc..582f9642f551a 100644 --- a/go/arrow/array/numericbuilder.gen_test.go.tmpl +++ b/go/arrow/array/numericbuilder.gen_test.go.tmpl @@ -19,9 +19,9 @@ package array_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go index ff773fb30195e..1c122c28f028b 100644 --- a/go/arrow/array/record.go +++ b/go/arrow/array/record.go @@ -22,9 +22,9 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -153,6 +153,10 @@ func NewRecord(schema *arrow.Schema, cols []arrow.Array, nrows int64) *simpleRec } func (rec *simpleRecord) validate() error { + if rec.rows == 0 && len(rec.arrs) == 0 { + return nil + } + if len(rec.arrs) != len(rec.schema.Fields()) { return fmt.Errorf("arrow/array: number of columns/fields mismatch") } diff --git a/go/arrow/array/record_test.go b/go/arrow/array/record_test.go index 2250f49044a71..5deeb27853b73 100644 --- a/go/arrow/array/record_test.go +++ b/go/arrow/array/record_test.go @@ -21,9 +21,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) @@ -135,8 +135,7 @@ func TestRecord(t *testing.T) { { schema: schema, cols: nil, - rows: -1, - err: fmt.Errorf("arrow/array: number of columns/fields mismatch"), + rows: 0, }, { schema: schema, diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go index 237ea0166b361..5cb73a79b5dbf 100644 --- a/go/arrow/array/string.go +++ b/go/arrow/array/string.go @@ -23,8 +23,8 @@ import ( "strings" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -62,6 +62,10 @@ func (a *String) ValueOffset(i int) int { return int(a.offsets[i+a.array.data.offset]) } +func (a *String) ValueOffset64(i int) int64 { + return int64(a.ValueOffset(i)) +} + func (a *String) ValueOffsets() []int32 { beg := a.array.data.offset end := beg + a.array.data.length + 1 @@ -159,88 +163,171 @@ func arrayEqualString(left, right *String) bool { return true } -// A StringBuilder is used to build a String array using the Append methods. -type StringBuilder struct { - builder *BinaryBuilder +// String represents an immutable sequence of variable-length UTF-8 strings. +type LargeString struct { + array + offsets []int64 + values string } -// NewStringBuilder creates a new StringBuilder. -func NewStringBuilder(mem memory.Allocator) *StringBuilder { - b := &StringBuilder{ - builder: NewBinaryBuilder(mem, arrow.BinaryTypes.String), +// NewStringData constructs a new String array from data. +func NewLargeStringData(data arrow.ArrayData) *LargeString { + a := &LargeString{} + a.refCount = 1 + a.setData(data.(*Data)) + return a +} + +// Reset resets the String with a different set of Data. +func (a *LargeString) Reset(data arrow.ArrayData) { + a.setData(data.(*Data)) +} + +// Value returns the slice at index i. This value should not be mutated. +func (a *LargeString) Value(i int) string { + i = i + a.array.data.offset + return a.values[a.offsets[i]:a.offsets[i+1]] +} + +// ValueOffset returns the offset of the value at index i. +func (a *LargeString) ValueOffset(i int) int64 { + if i < 0 || i > a.array.data.length { + panic("arrow/array: index out of range") } - return b + return a.offsets[i+a.array.data.offset] } -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *StringBuilder) Release() { - b.builder.Release() +func (a *LargeString) ValueOffset64(i int) int64 { + return a.ValueOffset(i) } -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (b *StringBuilder) Retain() { - b.builder.Retain() +func (a *LargeString) ValueOffsets() []int64 { + beg := a.array.data.offset + end := beg + a.array.data.length + 1 + return a.offsets[beg:end] } -// -// Len returns the number of elements in the array builder. -func (b *StringBuilder) Len() int { return b.builder.Len() } +func (a *LargeString) ValueBytes() (ret []byte) { + beg := a.array.data.offset + end := beg + a.array.data.length + data := a.values[a.offsets[beg]:a.offsets[end]] + + s := (*reflect.SliceHeader)(unsafe.Pointer(&ret)) + s.Data = (*reflect.StringHeader)(unsafe.Pointer(&data)).Data + s.Len = len(data) + s.Cap = len(data) + return +} + +func (a *LargeString) String() string { + o := new(strings.Builder) + o.WriteString("[") + for i := 0; i < a.Len(); i++ { + if i > 0 { + o.WriteString(" ") + } + switch { + case a.IsNull(i): + o.WriteString("(null)") + default: + fmt.Fprintf(o, "%q", a.Value(i)) + } + } + o.WriteString("]") + return o.String() +} -// Cap returns the total number of elements that can be stored without allocating additional memory. -func (b *StringBuilder) Cap() int { return b.builder.Cap() } +func (a *LargeString) setData(data *Data) { + if len(data.buffers) != 3 { + panic("arrow/array: len(data.buffers) != 3") + } -// NullN returns the number of null values in the array builder. -func (b *StringBuilder) NullN() int { return b.builder.NullN() } + a.array.setData(data) -// Append appends a string to the builder. -func (b *StringBuilder) Append(v string) { - b.builder.Append([]byte(v)) + if vdata := data.buffers[2]; vdata != nil { + b := vdata.Bytes() + a.values = *(*string)(unsafe.Pointer(&b)) + } + + if offsets := data.buffers[1]; offsets != nil { + a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes()) + } + + if a.array.data.length < 1 { + return + } + + expNumOffsets := a.array.data.offset + a.array.data.length + 1 + if len(a.offsets) < expNumOffsets { + panic(fmt.Errorf("arrow/array: string offset buffer must have at least %d values", expNumOffsets)) + } + + if int(a.offsets[expNumOffsets-1]) > len(a.values) { + panic("arrow/array: string offsets out of bounds of data buffer") + } } -// AppendNull appends a null to the builder. -func (b *StringBuilder) AppendNull() { - b.builder.AppendNull() +func (a *LargeString) getOneForMarshal(i int) interface{} { + if a.IsValid(i) { + return a.Value(i) + } + return nil } -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *StringBuilder) AppendValues(v []string, valid []bool) { - b.builder.AppendStringValues(v, valid) +func (a *LargeString) MarshalJSON() ([]byte, error) { + vals := make([]interface{}, a.Len()) + for i := 0; i < a.Len(); i++ { + if a.IsValid(i) { + vals[i] = a.Value(i) + } else { + vals[i] = nil + } + } + return json.Marshal(vals) } -// Value returns the string at index i. -func (b *StringBuilder) Value(i int) string { - return string(b.builder.Value(i)) +func arrayEqualLargeString(left, right *LargeString) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + if left.Value(i) != right.Value(i) { + return false + } + } + return true } -func (b *StringBuilder) init(capacity int) { - b.builder.init(capacity) +// A StringBuilder is used to build a String array using the Append methods. +type StringBuilder struct { + *BinaryBuilder } -func (b *StringBuilder) resize(newBits int, init func(int)) { - b.builder.resize(newBits, init) +// NewStringBuilder creates a new StringBuilder. +func NewStringBuilder(mem memory.Allocator) *StringBuilder { + b := &StringBuilder{ + BinaryBuilder: NewBinaryBuilder(mem, arrow.BinaryTypes.String), + } + return b } -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *StringBuilder) Reserve(n int) { - b.builder.Reserve(n) +func (b *StringBuilder) Type() arrow.DataType { return arrow.BinaryTypes.String } + +// Append appends a string to the builder. +func (b *StringBuilder) Append(v string) { + b.BinaryBuilder.Append([]byte(v)) } -// ReserveData ensures there is enough space for appending n bytes -// by checking the capacity and resizing the data buffer if necessary. -func (b *StringBuilder) ReserveData(n int) { - b.builder.ReserveData(n) +// AppendValues will append the values in the v slice. The valid slice determines which values +// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, +// all values in v are appended and considered valid. +func (b *StringBuilder) AppendValues(v []string, valid []bool) { + b.BinaryBuilder.AppendStringValues(v, valid) } -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *StringBuilder) Resize(n int) { - b.builder.Resize(n) +// Value returns the string at index i. +func (b *StringBuilder) Value(i int) string { + return string(b.BinaryBuilder.Value(i)) } // NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder @@ -252,7 +339,7 @@ func (b *StringBuilder) NewArray() arrow.Array { // NewStringArray creates a String array from the memory buffers used by the builder and resets the StringBuilder // so it can be used to build a new array. func (b *StringBuilder) NewStringArray() (a *String) { - data := b.builder.newData() + data := b.newData() a = NewStringData(data) data.Release() return @@ -302,7 +389,102 @@ func (b *StringBuilder) UnmarshalJSON(data []byte) error { return b.unmarshal(dec) } +// A LargeStringBuilder is used to build a LargeString array using the Append methods. +// LargeString is for when you need the offset buffer to be 64-bit integers +// instead of 32-bit integers. +type LargeStringBuilder struct { + *BinaryBuilder +} + +// NewStringBuilder creates a new StringBuilder. +func NewLargeStringBuilder(mem memory.Allocator) *LargeStringBuilder { + b := &LargeStringBuilder{ + BinaryBuilder: NewBinaryBuilder(mem, arrow.BinaryTypes.LargeString), + } + return b +} + +func (b *LargeStringBuilder) Type() arrow.DataType { return arrow.BinaryTypes.LargeString } + +// Append appends a string to the builder. +func (b *LargeStringBuilder) Append(v string) { + b.BinaryBuilder.Append([]byte(v)) +} + +// AppendValues will append the values in the v slice. The valid slice determines which values +// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, +// all values in v are appended and considered valid. +func (b *LargeStringBuilder) AppendValues(v []string, valid []bool) { + b.BinaryBuilder.AppendStringValues(v, valid) +} + +// Value returns the string at index i. +func (b *LargeStringBuilder) Value(i int) string { + return string(b.BinaryBuilder.Value(i)) +} + +// NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder +// so it can be used to build a new array. +func (b *LargeStringBuilder) NewArray() arrow.Array { + return b.NewLargeStringArray() +} + +// NewStringArray creates a String array from the memory buffers used by the builder and resets the StringBuilder +// so it can be used to build a new array. +func (b *LargeStringBuilder) NewLargeStringArray() (a *LargeString) { + data := b.newData() + a = NewLargeStringData(data) + data.Release() + return +} + +func (b *LargeStringBuilder) unmarshalOne(dec *json.Decoder) error { + t, err := dec.Token() + if err != nil { + return err + } + + switch v := t.(type) { + case nil: + b.AppendNull() + case string: + b.Append(v) + default: + return &json.UnmarshalTypeError{ + Value: fmt.Sprint(v), + Type: reflect.TypeOf(string("")), + Offset: dec.InputOffset(), + } + } + return nil +} + +func (b *LargeStringBuilder) unmarshal(dec *json.Decoder) error { + for dec.More() { + if err := b.unmarshalOne(dec); err != nil { + return err + } + } + return nil +} + +func (b *LargeStringBuilder) UnmarshalJSON(data []byte) error { + dec := json.NewDecoder(bytes.NewReader(data)) + t, err := dec.Token() + if err != nil { + return err + } + + if delim, ok := t.(json.Delim); !ok || delim != '[' { + return fmt.Errorf("string builder must unpack from json array, found %s", delim) + } + + return b.unmarshal(dec) +} + var ( _ arrow.Array = (*String)(nil) + _ arrow.Array = (*LargeString)(nil) _ Builder = (*StringBuilder)(nil) + _ Builder = (*LargeStringBuilder)(nil) ) diff --git a/go/arrow/array/string_test.go b/go/arrow/array/string_test.go index f0e0325772740..9632204d633d0 100644 --- a/go/arrow/array/string_test.go +++ b/go/arrow/array/string_test.go @@ -21,10 +21,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) @@ -277,3 +277,253 @@ func TestStringInvalidOffsets(t *testing.T) { array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 2)) }, "data has offset and value offset is overflowing") } + +func TestLargeStringArray(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + var ( + want = []string{"hello", "世界", "", "bye"} + valids = []bool{true, true, false, true} + offsets = []int64{0, 5, 11, 11, 14} + ) + + sb := array.NewLargeStringBuilder(mem) + defer sb.Release() + + sb.Retain() + sb.Release() + + sb.AppendValues(want[:2], nil) + + sb.AppendNull() + sb.Append(want[3]) + + if got, want := sb.Len(), len(want); got != want { + t.Fatalf("invalid len: got=%d, want=%d", got, want) + } + + if got, want := sb.NullN(), 1; got != want { + t.Fatalf("invalid nulls: got=%d, want=%d", got, want) + } + + arr := sb.NewLargeStringArray() + defer arr.Release() + + arr.Retain() + arr.Release() + + if got, want := arr.Len(), len(want); got != want { + t.Fatalf("invalid len: got=%d, want=%d", got, want) + } + + if got, want := arr.NullN(), 1; got != want { + t.Fatalf("invalid nulls: got=%d, want=%d", got, want) + } + + for i := range want { + if arr.IsNull(i) != !valids[i] { + t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) + } + switch { + case arr.IsNull(i): + default: + got := arr.Value(i) + if got != want[i] { + t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) + } + } + + if got, want := arr.ValueOffset(i), offsets[i]; got != want { + t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got, want) + } + if got, want := arr.ValueOffset(i+1), offsets[i+1]; got != want { + t.Fatalf("arr-offset-end[%d]: got=%d, want=%d", i+1, got, want) + } + } + + if !reflect.DeepEqual(offsets, arr.ValueOffsets()) { + t.Fatalf("ValueOffsets got=%v, want=%v", arr.ValueOffsets(), offsets) + } + + sub := array.MakeFromData(arr.Data()) + defer sub.Release() + + if sub.DataType().ID() != arrow.LARGE_STRING { + t.Fatalf("invalid type: got=%q, want=large_string", sub.DataType().Name()) + } + + if _, ok := sub.(*array.LargeString); !ok { + t.Fatalf("could not type-assert to array.LargeString") + } + + if got, want := arr.String(), `["hello" "世界" (null) "bye"]`; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + + if !bytes.Equal([]byte(`hello世界bye`), arr.ValueBytes()) { + t.Fatalf("got=%q, want=%q", string(arr.ValueBytes()), `hello世界bye`) + } + + slice := array.NewSliceData(arr.Data(), 2, 4) + defer slice.Release() + + sub1 := array.MakeFromData(slice) + defer sub1.Release() + + v, ok := sub1.(*array.LargeString) + if !ok { + t.Fatalf("could not type-assert to array.LargeString") + } + + if got, want := v.String(), `[(null) "bye"]`; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + + if !bytes.Equal(v.ValueBytes(), []byte("bye")) { + t.Fatalf("got=%q, want=%q", string(v.ValueBytes()), "bye") + } + + for i := 0; i < v.Len(); i++ { + if got, want := v.ValueOffset(0), offsets[i+slice.Offset()]; got != want { + t.Fatalf("val-offset-with-offset[%d]: got=%q, want=%q", i, got, want) + } + } + + if !reflect.DeepEqual(offsets[2:5], v.ValueOffsets()) { + t.Fatalf("ValueOffsets got=%v, want=%v", v.ValueOffsets(), offsets[2:5]) + } +} + +func TestLargeStringBuilder_Empty(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + want := []string{"hello", "世界", "", "bye"} + + ab := array.NewLargeStringBuilder(mem) + defer ab.Release() + + stringValues := func(a *array.LargeString) []string { + vs := make([]string, a.Len()) + for i := range vs { + vs[i] = a.Value(i) + } + return vs + } + + ab.AppendValues([]string{}, nil) + a := ab.NewLargeStringArray() + assert.Zero(t, a.Len()) + a.Release() + + ab.AppendValues(nil, nil) + a = ab.NewLargeStringArray() + assert.Zero(t, a.Len()) + a.Release() + + ab.AppendValues([]string{}, nil) + ab.AppendValues(want, nil) + a = ab.NewLargeStringArray() + assert.Equal(t, want, stringValues(a)) + a.Release() + + ab.AppendValues(want, nil) + ab.AppendValues([]string{}, nil) + a = ab.NewLargeStringArray() + assert.Equal(t, want, stringValues(a)) + a.Release() +} + +// TestStringReset tests the Reset() method on the String type by creating two different Strings and then +// reseting the contents of string2 with the values from string1. +func TestLargeStringReset(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + sb1 := array.NewLargeStringBuilder(mem) + sb2 := array.NewLargeStringBuilder(mem) + defer sb1.Release() + defer sb2.Release() + + sb1.Append("string1") + sb1.AppendNull() + + var ( + string1 = sb1.NewLargeStringArray() + string2 = sb2.NewLargeStringArray() + + string1Data = string1.Data() + ) + string2.Reset(string1Data) + + assert.Equal(t, "string1", string2.Value(0)) +} + +func TestLargeStringInvalidOffsets(t *testing.T) { + const expectedPanic = "arrow/array: string offsets out of bounds of data buffer" + + makeBuffers := func(valids []bool, offsets []int64, data string) []*memory.Buffer { + offsetBuf := memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(offsets)) + var nullBufBytes []byte + var nullBuf *memory.Buffer + if valids != nil { + nullBufBytes = make([]byte, bitutil.BytesForBits(int64(len(valids)))) + for i, v := range valids { + bitutil.SetBitTo(nullBufBytes, i, v) + } + nullBuf = memory.NewBufferBytes(nullBufBytes) + } + return []*memory.Buffer{nullBuf, offsetBuf, memory.NewBufferBytes([]byte(data))} + } + + assert.NotPanics(t, func() { + buffers := makeBuffers(nil, []int64{}, "") + array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 0, buffers, nil, 0, 0)) + }, "empty array with no offsets") + + assert.NotPanics(t, func() { + buffers := makeBuffers(nil, []int64{0, 5}, "") + array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 0, buffers, nil, 0, 0)) + }, "empty array, offsets ignored") + + assert.NotPanics(t, func() { + buffers := makeBuffers(nil, []int64{0, 3, 4, 9}, "oooabcdef") + array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 2)) + }, "data has offset and value offsets are valid") + + assert.NotPanics(t, func() { + buffers := makeBuffers(nil, []int64{0, 3, 6, 9, 9}, "012345678") + arr := array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 4, buffers, nil, 0, 0)) + if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) { + assert.Equal(t, "012", arr.Value(0)) + assert.Equal(t, "345", arr.Value(1)) + assert.Equal(t, "678", arr.Value(2)) + assert.Equal(t, "", arr.Value(3), "trailing empty string value will have offset past end") + } + }, "simple valid case") + + assert.NotPanics(t, func() { + buffers := makeBuffers([]bool{true, false, true, false}, []int64{0, 3, 4, 9, 9}, "oooabcdef") + arr := array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 4, buffers, nil, 2, 0)) + if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, arr.NullN()) { + assert.Equal(t, "ooo", arr.Value(0)) + assert.True(t, arr.IsNull(1)) + assert.Equal(t, "bcdef", arr.Value(2)) + assert.True(t, arr.IsNull(3)) + } + }, "simple valid case with nulls") + + assert.PanicsWithValue(t, expectedPanic, func() { + buffers := makeBuffers(nil, []int64{0, 5}, "abc") + array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 0)) + }, "last offset is overflowing") + + assert.PanicsWithError(t, "arrow/array: string offset buffer must have at least 2 values", func() { + buffers := makeBuffers(nil, []int64{0}, "abc") + array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 0)) + }, "last offset is missing") + + assert.PanicsWithValue(t, expectedPanic, func() { + buffers := makeBuffers(nil, []int64{0, 3, 10, 15}, "oooabcdef") + array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 2)) + }, "data has offset and value offset is overflowing") +} diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go index 052fabb7fd7b6..2adf17623c0cc 100644 --- a/go/arrow/array/struct.go +++ b/go/arrow/array/struct.go @@ -23,10 +23,10 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" ) @@ -181,6 +181,15 @@ func NewStructBuilder(mem memory.Allocator, dtype *arrow.StructType) *StructBuil return b } +func (b *StructBuilder) Type() arrow.DataType { + fields := make([]arrow.Field, len(b.fields)) + copy(fields, b.dtype.(*arrow.StructType).Fields()) + for i, b := range b.fields { + fields[i].Type = b.Type() + } + return arrow.StructOf(fields...) +} + // Release decreases the reference count by 1. // When the reference count goes to zero, the memory is freed. func (b *StructBuilder) Release() { @@ -215,6 +224,13 @@ func (b *StructBuilder) AppendValues(valids []bool) { func (b *StructBuilder) AppendNull() { b.Append(false) } +func (b *StructBuilder) AppendEmptyValue() { + b.Append(true) + for _, f := range b.fields { + f.AppendEmptyValue() + } +} + func (b *StructBuilder) unsafeAppendBoolToBitmap(isValid bool) { if isValid { bitutil.SetBit(b.nullBitmap.Bytes(), b.length) @@ -276,7 +292,7 @@ func (b *StructBuilder) NewStructArray() (a *Struct) { return } -func (b *StructBuilder) newData() (data arrow.ArrayData) { +func (b *StructBuilder) newData() (data *Data) { fields := make([]arrow.ArrayData, len(b.fields)) for i, f := range b.fields { arr := f.NewArray() @@ -285,7 +301,7 @@ func (b *StructBuilder) newData() (data arrow.ArrayData) { } data = NewData( - b.dtype, b.length, + b.Type(), b.length, []*memory.Buffer{ b.nullBitmap, }, diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go index ca4b5ef904d53..7bef10f9f877f 100644 --- a/go/arrow/array/struct_test.go +++ b/go/arrow/array/struct_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestStructArray(t *testing.T) { diff --git a/go/arrow/array/table.go b/go/arrow/array/table.go index be0728110bb45..0e17fd565a53a 100644 --- a/go/arrow/array/table.go +++ b/go/arrow/array/table.go @@ -22,8 +22,8 @@ import ( "math" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/debug" ) // NewColumnSlice returns a new zero-copy slice of the column with the indicated diff --git a/go/arrow/array/table_test.go b/go/arrow/array/table_test.go index 9536c3b62d09f..4230f0790c4c6 100644 --- a/go/arrow/array/table_test.go +++ b/go/arrow/array/table_test.go @@ -21,9 +21,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestChunked(t *testing.T) { diff --git a/go/arrow/array/union.go b/go/arrow/array/union.go new file mode 100644 index 0000000000000..da15f63b336d7 --- /dev/null +++ b/go/arrow/array/union.go @@ -0,0 +1,1308 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array + +import ( + "bytes" + "errors" + "fmt" + "math" + "reflect" + "strings" + "sync/atomic" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/goccy/go-json" +) + +// Union is a convenience interface to encompass both Sparse and Dense +// union array types. +type Union interface { + arrow.Array + // NumFields returns the number of child fields in this union. + // Equivalent to len(UnionType().Fields()) + NumFields() int + // Validate returns an error if there are any issues with the lengths + // or types of the children arrays mismatching with the Type of the + // Union Array. nil is returned if there are no problems. + Validate() error + // ValidateFull runs the same checks that Validate() does, but additionally + // checks that all childIDs are valid (>= 0 || ==InvalidID) and for + // dense unions validates that all offsets are within the bounds of their + // respective child. + ValidateFull() error + // TypeCodes returns the type id buffer for the union Array, equivalent to + // Data().Buffers()[1]. Note: This will not account for any slice offset. + TypeCodes() *memory.Buffer + // RawTypeCodes returns a slice of UnionTypeCodes properly accounting for + // any slice offset. + RawTypeCodes() []arrow.UnionTypeCode + // TypeCode returns the logical type code of the value at the requested index + TypeCode(i int) arrow.UnionTypeCode + // ChildID returns the index of the physical child containing the value + // at the requested index. Equivalent to: + // + // arr.UnionType().ChildIDs()[arr.RawTypeCodes()[i+arr.Data().Offset()]] + ChildID(i int) int + // UnionType is a convenience function to retrieve the properly typed UnionType + // instead of having to call DataType() and manually assert the type. + UnionType() arrow.UnionType + // Mode returns the union mode of the underlying Array, either arrow.SparseMode + // or arrow.DenseMode. + Mode() arrow.UnionMode + // Field returns the requested child array for this union. Returns nil if a + // non-existent position is passed in. + // + // The appropriate child for an index can be retrieved with Field(ChildID(index)) + Field(pos int) arrow.Array +} + +const kMaxElems = math.MaxInt32 + +type union struct { + array + + unionType arrow.UnionType + typecodes []arrow.UnionTypeCode + + children []arrow.Array +} + +func (a *union) Retain() { + a.array.Retain() + for _, c := range a.children { + c.Retain() + } +} + +func (a *union) Release() { + a.array.Release() + for _, c := range a.children { + c.Release() + } +} + +func (a *union) NumFields() int { return len(a.unionType.Fields()) } + +func (a *union) Mode() arrow.UnionMode { return a.unionType.Mode() } + +func (a *union) UnionType() arrow.UnionType { return a.unionType } + +func (a *union) TypeCodes() *memory.Buffer { + return a.data.buffers[1] +} + +func (a *union) RawTypeCodes() []arrow.UnionTypeCode { + if a.data.length > 0 { + return a.typecodes[a.data.offset:] + } + return []arrow.UnionTypeCode{} +} + +func (a *union) TypeCode(i int) arrow.UnionTypeCode { + return a.typecodes[i+a.data.offset] +} + +func (a *union) ChildID(i int) int { + return a.unionType.ChildIDs()[a.typecodes[i+a.data.offset]] +} + +func (a *union) setData(data *Data) { + a.unionType = data.dtype.(arrow.UnionType) + debug.Assert(len(data.buffers) >= 2, "arrow/array: invalid number of union array buffers") + + if data.length > 0 { + a.typecodes = arrow.Int8Traits.CastFromBytes(data.buffers[1].Bytes()) + } else { + a.typecodes = []int8{} + } + a.children = make([]arrow.Array, len(data.childData)) + for i, child := range data.childData { + if a.unionType.Mode() == arrow.SparseMode && (data.offset != 0 || child.Len() != data.length) { + child = NewSliceData(child, int64(data.offset), int64(data.offset+data.length)) + defer child.Release() + } + a.children[i] = MakeFromData(child) + } + a.array.setData(data) +} + +func (a *union) Field(pos int) (result arrow.Array) { + if pos < 0 || pos >= len(a.children) { + return nil + } + + return a.children[pos] +} + +func (a *union) Validate() error { + fields := a.unionType.Fields() + for i, f := range fields { + fieldData := a.data.childData[i] + if a.unionType.Mode() == arrow.SparseMode && fieldData.Len() < a.data.length+a.data.offset { + return fmt.Errorf("arrow/array: sparse union child array #%d has length smaller than expected for union array (%d < %d)", + i, fieldData.Len(), a.data.length+a.data.offset) + } + + if !arrow.TypeEqual(f.Type, fieldData.DataType()) { + return fmt.Errorf("arrow/array: union child array #%d does not match type field %s vs %s", + i, fieldData.DataType(), f.Type) + } + } + return nil +} + +func (a *union) ValidateFull() error { + if err := a.Validate(); err != nil { + return err + } + + childIDs := a.unionType.ChildIDs() + codesMap := a.unionType.TypeCodes() + codes := a.RawTypeCodes() + + for i := 0; i < a.data.length; i++ { + code := codes[i] + if code < 0 || childIDs[code] == arrow.InvalidUnionChildID { + return fmt.Errorf("arrow/array: union value at position %d has invalid type id %d", i, code) + } + } + + if a.unionType.Mode() == arrow.DenseMode { + // validate offsets + + // map logical typeid to child length + var childLengths [256]int64 + for i := range a.unionType.Fields() { + childLengths[codesMap[i]] = int64(a.data.childData[i].Len()) + } + + // check offsets are in bounds + var lastOffsets [256]int64 + offsets := arrow.Int32Traits.CastFromBytes(a.data.buffers[2].Bytes())[a.data.offset:] + for i := int64(0); i < int64(a.data.length); i++ { + code := codes[i] + offset := offsets[i] + switch { + case offset < 0: + return fmt.Errorf("arrow/array: union value at position %d has negative offset %d", i, offset) + case offset >= int32(childLengths[code]): + return fmt.Errorf("arrow/array: union value at position %d has offset larger than child length (%d >= %d)", + i, offset, childLengths[code]) + case offset < int32(lastOffsets[code]): + return fmt.Errorf("arrow/array: union value at position %d has non-monotonic offset %d", i, offset) + } + lastOffsets[code] = int64(offset) + } + } + + return nil +} + +// SparseUnion represents an array where each logical value is taken from +// a single child. A buffer of 8-bit type ids indicates which child a given +// logical value is to be taken from. This is represented as the ChildID, +// which is the index into the list of children. +// +// In a sparse union, each child array will have the same length as the +// union array itself, regardless of how many values in the union actually +// refer to it. +// +// Unlike most other arrays, unions do not have a top-level validity bitmap. +type SparseUnion struct { + union +} + +// NewSparseUnion constructs a union array using the given type, length, list of +// children and buffer of typeIDs with the given offset. +func NewSparseUnion(dt *arrow.SparseUnionType, length int, children []arrow.Array, typeIDs *memory.Buffer, offset int) *SparseUnion { + childData := make([]arrow.ArrayData, len(children)) + for i, c := range children { + childData[i] = c.Data() + } + data := NewData(dt, length, []*memory.Buffer{nil, typeIDs}, childData, 0, offset) + defer data.Release() + return NewSparseUnionData(data) +} + +// NewSparseUnionData constructs a SparseUnion array from the given ArrayData object. +func NewSparseUnionData(data arrow.ArrayData) *SparseUnion { + a := &SparseUnion{} + a.refCount = 1 + a.setData(data.(*Data)) + return a +} + +// NewSparseUnionFromArrays constructs a new SparseUnion array with the provided +// values. +// +// typeIDs *must* be an INT8 array with no nulls +// len(codes) *must* be either 0 or equal to len(children). If len(codes) is 0, +// the type codes used will be sequentially numeric starting at 0. +func NewSparseUnionFromArrays(typeIDs arrow.Array, children []arrow.Array, codes ...arrow.UnionTypeCode) (*SparseUnion, error) { + return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, []string{}, codes) +} + +// NewSparseUnionFromArrayWithFields constructs a new SparseUnion array like +// NewSparseUnionFromArrays, but allows specifying the field names. Type codes +// will be auto-generated sequentially starting at 0. +// +// typeIDs *must* be an INT8 array with no nulls. +// len(fields) *must* either be 0 or equal to len(children). If len(fields) is 0, +// then the fields will be named sequentially starting at "0". +func NewSparseUnionFromArraysWithFields(typeIDs arrow.Array, children []arrow.Array, fields []string) (*SparseUnion, error) { + return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, fields, []arrow.UnionTypeCode{}) +} + +// NewSparseUnionFromArraysWithFieldCodes combines the other constructors +// for constructing a new SparseUnion array with the provided field names +// and type codes, along with children and type ids. +// +// All the requirements mentioned in NewSparseUnionFromArrays and +// NewSparseUnionFromArraysWithFields apply. +func NewSparseUnionFromArraysWithFieldCodes(typeIDs arrow.Array, children []arrow.Array, fields []string, codes []arrow.UnionTypeCode) (*SparseUnion, error) { + switch { + case typeIDs.DataType().ID() != arrow.INT8: + return nil, errors.New("arrow/array: union array type ids must be signed int8") + case typeIDs.NullN() != 0: + return nil, errors.New("arrow/array: union type ids may not have nulls") + case len(fields) > 0 && len(fields) != len(children): + return nil, errors.New("arrow/array: field names must have the same length as children") + case len(codes) > 0 && len(codes) != len(children): + return nil, errors.New("arrow/array: type codes must have same length as children") + } + + buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1]} + ty := arrow.SparseUnionFromArrays(children, fields, codes) + + childData := make([]arrow.ArrayData, len(children)) + for i, c := range children { + childData[i] = c.Data() + if c.Len() != typeIDs.Len() { + return nil, errors.New("arrow/array: sparse union array must have len(child) == len(typeids) for all children") + } + } + + data := NewData(ty, typeIDs.Len(), buffers, childData, 0, typeIDs.Data().Offset()) + defer data.Release() + return NewSparseUnionData(data), nil +} + +func (a *SparseUnion) setData(data *Data) { + a.union.setData(data) + debug.Assert(a.data.dtype.ID() == arrow.SPARSE_UNION, "arrow/array: invalid data type for SparseUnion") + debug.Assert(len(a.data.buffers) == 2, "arrow/array: sparse unions should have exactly 2 buffers") + debug.Assert(a.data.buffers[0] == nil, "arrow/array: validity bitmap for sparse unions should be nil") +} + +func (a *SparseUnion) getOneForMarshal(i int) interface{} { + typeID := a.RawTypeCodes()[i] + + childID := a.ChildID(i) + data := a.Field(childID) + + if data.IsNull(i) { + return nil + } + + return []interface{}{typeID, data.(arraymarshal).getOneForMarshal(i)} +} + +func (a *SparseUnion) MarshalJSON() ([]byte, error) { + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + + buf.WriteByte('[') + for i := 0; i < a.Len(); i++ { + if i != 0 { + buf.WriteByte(',') + } + if err := enc.Encode(a.getOneForMarshal(i)); err != nil { + return nil, err + } + } + buf.WriteByte(']') + return buf.Bytes(), nil +} + +func (a *SparseUnion) String() string { + var b strings.Builder + b.WriteByte('[') + + fieldList := a.unionType.Fields() + for i := 0; i < a.Len(); i++ { + if i > 0 { + b.WriteString(" ") + } + + field := fieldList[a.ChildID(i)] + f := a.Field(a.ChildID(i)) + fmt.Fprintf(&b, "{%s=%v}", field.Name, f.(arraymarshal).getOneForMarshal(i)) + } + b.WriteByte(']') + return b.String() +} + +// GetFlattenedField returns a child array, adjusting its validity bitmap +// where the union array type codes don't match. +// +// ie: the returned array will have a null in every index that it is +// not referenced by union. +func (a *SparseUnion) GetFlattenedField(mem memory.Allocator, index int) (arrow.Array, error) { + if index < 0 || index >= a.NumFields() { + return nil, fmt.Errorf("arrow/array: index out of range: %d", index) + } + + childData := a.data.childData[index] + if a.data.offset != 0 || a.data.length != childData.Len() { + childData = NewSliceData(childData, int64(a.data.offset), int64(a.data.offset+a.data.length)) + // NewSliceData doesn't break the slice reference for buffers + // since we're going to replace the null bitmap buffer we need to break the + // slice reference so that we don't affect a.children's references + newBufs := make([]*memory.Buffer, len(childData.Buffers())) + copy(newBufs, childData.(*Data).buffers) + childData.(*Data).buffers = newBufs + } else { + childData = childData.(*Data).Copy() + } + defer childData.Release() + + // synthesize a null bitmap based on the union discriminant + // make sure the bitmap has extra bits corresponding to the child's offset + flattenedNullBitmap := memory.NewResizableBuffer(mem) + flattenedNullBitmap.Resize(childData.Len() + childData.Offset()) + + var ( + childNullBitmap = childData.Buffers()[0] + childOffset = childData.Offset() + typeCode = a.unionType.TypeCodes()[index] + codes = a.RawTypeCodes() + offset int64 = 0 + ) + bitutils.GenerateBitsUnrolled(flattenedNullBitmap.Bytes(), int64(childOffset), int64(a.data.length), + func() bool { + b := codes[offset] == typeCode + offset++ + return b + }) + + if childNullBitmap != nil { + defer childNullBitmap.Release() + bitutil.BitmapAnd(flattenedNullBitmap.Bytes(), childNullBitmap.Bytes(), + int64(childOffset), int64(childOffset), flattenedNullBitmap.Bytes(), + int64(childOffset), int64(childData.Len())) + } + childData.(*Data).buffers[0] = flattenedNullBitmap + childData.(*Data).nulls = childData.Len() - bitutil.CountSetBits(flattenedNullBitmap.Bytes(), childOffset, childData.Len()) + return MakeFromData(childData), nil +} + +func arraySparseUnionEqual(l, r *SparseUnion) bool { + childIDs := l.unionType.ChildIDs() + leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() + + for i := 0; i < l.data.length; i++ { + typeID := leftCodes[i] + if typeID != rightCodes[i] { + return false + } + + childNum := childIDs[typeID] + eq := SliceEqual(l.children[childNum], int64(i), int64(i+1), + r.children[childNum], int64(i), int64(i+1)) + if !eq { + return false + } + } + return true +} + +func arraySparseUnionApproxEqual(l, r *SparseUnion, opt equalOption) bool { + childIDs := l.unionType.ChildIDs() + leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() + + for i := 0; i < l.data.length; i++ { + typeID := leftCodes[i] + if typeID != rightCodes[i] { + return false + } + + childNum := childIDs[typeID] + eq := sliceApproxEqual(l.children[childNum], int64(i+l.data.offset), int64(i+l.data.offset+1), + r.children[childNum], int64(i+r.data.offset), int64(i+r.data.offset+1), opt) + if !eq { + return false + } + } + return true +} + +// DenseUnion represents an array where each logical value is taken from +// a single child, at a specific offset. A buffer of 8-bit type ids +// indicates which child a given logical value is to be taken from and +// a buffer of 32-bit offsets indicating which physical position in the +// given child array has the logical value for that index. +// +// Unlike a sparse union, a dense union allows encoding only the child values +// which are actually referred to by the union array. This is counterbalanced +// by the additional footprint of the offsets buffer, and the additional +// indirection cost when looking up values. +// +// Unlike most other arrays, unions do not have a top-level validity bitmap. +type DenseUnion struct { + union + offsets []int32 +} + +// NewDenseUnion constructs a union array using the given type, length, list of +// children and buffers of typeIDs and offsets, with the given array offset. +func NewDenseUnion(dt *arrow.DenseUnionType, length int, children []arrow.Array, typeIDs, valueOffsets *memory.Buffer, offset int) *DenseUnion { + childData := make([]arrow.ArrayData, len(children)) + for i, c := range children { + childData[i] = c.Data() + } + + data := NewData(dt, length, []*memory.Buffer{nil, typeIDs, valueOffsets}, childData, 0, offset) + defer data.Release() + return NewDenseUnionData(data) +} + +// NewDenseUnionData constructs a DenseUnion array from the given ArrayData object. +func NewDenseUnionData(data arrow.ArrayData) *DenseUnion { + a := &DenseUnion{} + a.refCount = 1 + a.setData(data.(*Data)) + return a +} + +// NewDenseUnionFromArrays constructs a new DenseUnion array with the provided +// values. +// +// typeIDs *must* be an INT8 array with no nulls +// offsets *must* be an INT32 array with no nulls +// len(codes) *must* be either 0 or equal to len(children). If len(codes) is 0, +// the type codes used will be sequentially numeric starting at 0. +func NewDenseUnionFromArrays(typeIDs, offsets arrow.Array, children []arrow.Array, codes ...arrow.UnionTypeCode) (*DenseUnion, error) { + return NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets, children, []string{}, codes) +} + +// NewDenseUnionFromArrayWithFields constructs a new DenseUnion array like +// NewDenseUnionFromArrays, but allows specifying the field names. Type codes +// will be auto-generated sequentially starting at 0. +// +// typeIDs *must* be an INT8 array with no nulls. +// offsets *must* be an INT32 array with no nulls. +// len(fields) *must* either be 0 or equal to len(children). If len(fields) is 0, +// then the fields will be named sequentially starting at "0". +func NewDenseUnionFromArraysWithFields(typeIDs, offsets arrow.Array, children []arrow.Array, fields []string) (*DenseUnion, error) { + return NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets, children, fields, []arrow.UnionTypeCode{}) +} + +// NewDenseUnionFromArraysWithFieldCodes combines the other constructors +// for constructing a new DenseUnion array with the provided field names +// and type codes, along with children and type ids. +// +// All the requirements mentioned in NewDenseUnionFromArrays and +// NewDenseUnionFromArraysWithFields apply. +func NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets arrow.Array, children []arrow.Array, fields []string, codes []arrow.UnionTypeCode) (*DenseUnion, error) { + switch { + case offsets.DataType().ID() != arrow.INT32: + return nil, errors.New("arrow/array: union offsets must be signed int32") + case typeIDs.DataType().ID() != arrow.INT8: + return nil, errors.New("arrow/array: union type_ids must be signed int8") + case typeIDs.NullN() != 0: + return nil, errors.New("arrow/array: union typeIDs may not have nulls") + case offsets.NullN() != 0: + return nil, errors.New("arrow/array: nulls are not allowed in offsets for NewDenseUnionFromArrays*") + case len(fields) > 0 && len(fields) != len(children): + return nil, errors.New("arrow/array: fields must be the same length as children") + case len(codes) > 0 && len(codes) != len(children): + return nil, errors.New("arrow/array: typecodes must have the same length as children") + } + + ty := arrow.DenseUnionFromArrays(children, fields, codes) + buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1], offsets.Data().Buffers()[1]} + + childData := make([]arrow.ArrayData, len(children)) + for i, c := range children { + childData[i] = c.Data() + } + + data := NewData(ty, typeIDs.Len(), buffers, childData, 0, typeIDs.Data().Offset()) + defer data.Release() + return NewDenseUnionData(data), nil +} + +func (a *DenseUnion) ValueOffsets() *memory.Buffer { return a.data.buffers[2] } + +func (a *DenseUnion) ValueOffset(i int) int32 { return a.offsets[i+a.data.offset] } + +func (a *DenseUnion) RawValueOffsets() []int32 { return a.offsets[a.data.offset:] } + +func (a *DenseUnion) setData(data *Data) { + a.union.setData(data) + debug.Assert(a.data.dtype.ID() == arrow.DENSE_UNION, "arrow/array: invalid data type for DenseUnion") + debug.Assert(len(a.data.buffers) == 3, "arrow/array: dense unions should have exactly 3 buffers") + debug.Assert(a.data.buffers[0] == nil, "arrow/array: validity bitmap for dense unions should be nil") + + if data.length > 0 { + a.offsets = arrow.Int32Traits.CastFromBytes(a.data.buffers[2].Bytes()) + } else { + a.offsets = []int32{} + } +} + +func (a *DenseUnion) getOneForMarshal(i int) interface{} { + typeID := a.RawTypeCodes()[i] + + childID := a.ChildID(i) + data := a.Field(childID) + + offsets := a.RawValueOffsets() + if data.IsNull(int(offsets[i])) { + return nil + } + + return []interface{}{typeID, data.(arraymarshal).getOneForMarshal(int(offsets[i]))} +} + +func (a *DenseUnion) MarshalJSON() ([]byte, error) { + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + + buf.WriteByte('[') + for i := 0; i < a.Len(); i++ { + if i != 0 { + buf.WriteByte(',') + } + if err := enc.Encode(a.getOneForMarshal(i)); err != nil { + return nil, err + } + } + buf.WriteByte(']') + return buf.Bytes(), nil +} + +func (a *DenseUnion) String() string { + var b strings.Builder + b.WriteByte('[') + + offsets := a.RawValueOffsets() + + fieldList := a.unionType.Fields() + for i := 0; i < a.Len(); i++ { + if i > 0 { + b.WriteString(" ") + } + + field := fieldList[a.ChildID(i)] + f := a.Field(a.ChildID(i)) + fmt.Fprintf(&b, "{%s=%v}", field.Name, f.(arraymarshal).getOneForMarshal(int(offsets[i]))) + } + b.WriteByte(']') + return b.String() +} + +func arrayDenseUnionEqual(l, r *DenseUnion) bool { + childIDs := l.unionType.ChildIDs() + leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() + leftOffsets, rightOffsets := l.RawValueOffsets(), r.RawValueOffsets() + + for i := 0; i < l.data.length; i++ { + typeID := leftCodes[i] + if typeID != rightCodes[i] { + return false + } + + childNum := childIDs[typeID] + eq := SliceEqual(l.children[childNum], int64(leftOffsets[i]), int64(leftOffsets[i]+1), + r.children[childNum], int64(rightOffsets[i]), int64(rightOffsets[i]+1)) + if !eq { + return false + } + } + return true +} + +func arrayDenseUnionApproxEqual(l, r *DenseUnion, opt equalOption) bool { + childIDs := l.unionType.ChildIDs() + leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() + leftOffsets, rightOffsets := l.RawValueOffsets(), r.RawValueOffsets() + + for i := 0; i < l.data.length; i++ { + typeID := leftCodes[i] + if typeID != rightCodes[i] { + return false + } + + childNum := childIDs[typeID] + eq := sliceApproxEqual(l.children[childNum], int64(leftOffsets[i]), int64(leftOffsets[i]+1), + r.children[childNum], int64(rightOffsets[i]), int64(rightOffsets[i]+1), opt) + if !eq { + return false + } + } + return true +} + +// UnionBuilder is a convenience interface for building Union arrays of +// either Dense or Sparse mode. +type UnionBuilder interface { + Builder + // AppendNulls appends n nulls to the array + AppendNulls(n int) + // AppendEmptyValues appends n empty zero values to the array + AppendEmptyValues(n int) + // AppendChild allows constructing the union type on the fly by making new + // new array builder available to the union builder. The type code (index) + // of the new child is returned, which should be passed to the Append method + // when adding a new element to the union array. + AppendChild(newChild Builder, fieldName string) (newCode arrow.UnionTypeCode) + // Append adds an element to the UnionArray indicating which typecode the + // new element should use. This *must* be followed up by an append to the + // appropriate child builder. + Append(arrow.UnionTypeCode) + // Mode returns what kind of Union is being built, either arrow.SparseMode + // or arrow.DenseMode + Mode() arrow.UnionMode + // Child returns the builder for the requested child index. + // If an invalid index is requested (e.g. <0 or >len(children)) + // then this will panic. + Child(idx int) Builder +} + +type unionBuilder struct { + builder + + childFields []arrow.Field + codes []arrow.UnionTypeCode + mode arrow.UnionMode + + children []Builder + typeIDtoBuilder []Builder + typeIDtoChildID []int + // for all typeID < denseTypeID, typeIDtoBuilder[typeID] != nil + denseTypeID arrow.UnionTypeCode + typesBuilder *int8BufferBuilder +} + +func newUnionBuilder(mem memory.Allocator, children []Builder, typ arrow.UnionType) unionBuilder { + if children == nil { + children = make([]Builder, 0) + } + b := unionBuilder{ + builder: builder{refCount: 1, mem: mem}, + mode: typ.Mode(), + codes: typ.TypeCodes(), + children: children, + typeIDtoChildID: make([]int, typ.MaxTypeCode()+1), + typeIDtoBuilder: make([]Builder, typ.MaxTypeCode()+1), + childFields: make([]arrow.Field, len(children)), + typesBuilder: newInt8BufferBuilder(mem), + } + + b.typeIDtoChildID[0] = arrow.InvalidUnionChildID + for i := 1; i < len(b.typeIDtoChildID); i *= 2 { + copy(b.typeIDtoChildID[i:], b.typeIDtoChildID[:i]) + } + + debug.Assert(len(children) == len(typ.TypeCodes()), "mismatched typecodes and children") + debug.Assert(len(b.typeIDtoBuilder)-1 <= int(arrow.MaxUnionTypeCode), "too many typeids") + + copy(b.childFields, typ.Fields()) + for i, c := range children { + c.Retain() + typeID := typ.TypeCodes()[i] + b.typeIDtoChildID[typeID] = i + b.typeIDtoBuilder[typeID] = c + } + + return b +} + +func (b *unionBuilder) Child(idx int) Builder { + if idx < 0 || idx > len(b.children) { + panic("arrow/array: invalid child index for union builder") + } + return b.children[idx] +} + +func (b *unionBuilder) Mode() arrow.UnionMode { return b.mode } + +func (b *unionBuilder) reserve(elements int, resize func(int)) { + // union has no null bitmap, ever so we can skip that handling + if b.length+elements > b.capacity { + b.capacity = bitutil.NextPowerOf2(b.length + elements) + resize(b.capacity) + } +} + +func (b *unionBuilder) Release() { + debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") + + if atomic.AddInt64(&b.refCount, -1) == 0 { + for _, c := range b.children { + c.Release() + } + b.typesBuilder.Release() + } +} + +func (b *unionBuilder) Type() arrow.DataType { + fields := make([]arrow.Field, len(b.childFields)) + for i, f := range b.childFields { + fields[i] = f + fields[i].Type = b.children[i].Type() + } + + switch b.mode { + case arrow.SparseMode: + return arrow.SparseUnionOf(fields, b.codes) + case arrow.DenseMode: + return arrow.DenseUnionOf(fields, b.codes) + default: + panic("invalid union builder mode") + } +} + +func (b *unionBuilder) AppendChild(newChild Builder, fieldName string) arrow.UnionTypeCode { + newChild.Retain() + b.children = append(b.children, newChild) + newType := b.nextTypeID() + + b.typeIDtoChildID[newType] = len(b.children) - 1 + b.typeIDtoBuilder[newType] = newChild + b.childFields = append(b.childFields, arrow.Field{Name: fieldName, Nullable: true}) + b.codes = append(b.codes, newType) + + return newType +} + +func (b *unionBuilder) nextTypeID() arrow.UnionTypeCode { + // find typeID such that typeIDtoBuilder[typeID] == nil + // use that for the new child. Start searching at denseTypeID + // since typeIDtoBuilder is densely packed up at least to denseTypeID + for ; int(b.denseTypeID) < len(b.typeIDtoBuilder); b.denseTypeID++ { + if b.typeIDtoBuilder[b.denseTypeID] == nil { + id := b.denseTypeID + b.denseTypeID++ + return id + } + } + + debug.Assert(len(b.typeIDtoBuilder) < int(arrow.MaxUnionTypeCode), "too many children typeids") + // typeIDtoBuilder is already densely packed, so just append the new child + b.typeIDtoBuilder = append(b.typeIDtoBuilder, nil) + b.typeIDtoChildID = append(b.typeIDtoChildID, arrow.InvalidUnionChildID) + id := b.denseTypeID + b.denseTypeID++ + return id + +} + +func (b *unionBuilder) newData() *Data { + length := b.typesBuilder.Len() + typesBuffer := b.typesBuilder.Finish() + defer typesBuffer.Release() + childData := make([]arrow.ArrayData, len(b.children)) + for i, b := range b.children { + childData[i] = b.newData() + defer childData[i].Release() + } + + return NewData(b.Type(), length, []*memory.Buffer{nil, typesBuffer}, childData, 0, 0) +} + +// SparseUnionBuilder is used to build a Sparse Union array using the Append +// methods. You can also add new types to the union on the fly by using +// AppendChild. +// +// Keep in mind: All children of a SparseUnion should be the same length +// as the union itself. If you add new children with AppendChild, ensure +// that they have the correct number of preceding elements that have been +// added to the builder beforehand. +type SparseUnionBuilder struct { + unionBuilder +} + +// NewEmptySparseUnionBuilder is a helper to construct a SparseUnionBuilder +// without having to predefine the union types. It creates a builder with no +// children and AppendChild will have to be called before appending any +// elements to this builder. +func NewEmptySparseUnionBuilder(mem memory.Allocator) *SparseUnionBuilder { + return &SparseUnionBuilder{ + unionBuilder: newUnionBuilder(mem, nil, arrow.SparseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})), + } +} + +// NewSparseUnionBuilder constructs a new SparseUnionBuilder with the provided +// children and type codes. Builders will be constructed for each child +// using the fields in typ +func NewSparseUnionBuilder(mem memory.Allocator, typ *arrow.SparseUnionType) *SparseUnionBuilder { + children := make([]Builder, len(typ.Fields())) + for i, f := range typ.Fields() { + children[i] = NewBuilder(mem, f.Type) + defer children[i].Release() + } + return NewSparseUnionBuilderWithBuilders(mem, typ, children) +} + +// NewSparseUnionWithBuilders returns a new SparseUnionBuilder using the +// provided type and builders. +func NewSparseUnionBuilderWithBuilders(mem memory.Allocator, typ *arrow.SparseUnionType, children []Builder) *SparseUnionBuilder { + return &SparseUnionBuilder{ + unionBuilder: newUnionBuilder(mem, children, typ), + } +} + +func (b *SparseUnionBuilder) Reserve(n int) { + b.reserve(n, b.Resize) +} + +func (b *SparseUnionBuilder) Resize(n int) { + b.typesBuilder.resize(n) +} + +// AppendNull will append a null to the first child and an empty value +// (implementation-defined) to the rest of the children. +func (b *SparseUnionBuilder) AppendNull() { + firstChildCode := b.codes[0] + b.typesBuilder.AppendValue(firstChildCode) + b.typeIDtoBuilder[firstChildCode].AppendNull() + for _, c := range b.codes[1:] { + b.typeIDtoBuilder[c].AppendEmptyValue() + } +} + +// AppendNulls is identical to calling AppendNull() n times, except +// it will pre-allocate with reserve for all the nulls beforehand. +func (b *SparseUnionBuilder) AppendNulls(n int) { + firstChildCode := b.codes[0] + b.Reserve(n) + for _, c := range b.codes { + b.typeIDtoBuilder[c].Reserve(n) + } + for i := 0; i < n; i++ { + b.typesBuilder.AppendValue(firstChildCode) + b.typeIDtoBuilder[firstChildCode].AppendNull() + for _, c := range b.codes[1:] { + b.typeIDtoBuilder[c].AppendEmptyValue() + } + } +} + +// AppendEmptyValue appends an empty value (implementation defined) +// to each child, and appends the type of the first typecode to the typeid +// buffer. +func (b *SparseUnionBuilder) AppendEmptyValue() { + b.typesBuilder.AppendValue(b.codes[0]) + for _, c := range b.codes { + b.typeIDtoBuilder[c].AppendEmptyValue() + } +} + +// AppendEmptyValues is identical to calling AppendEmptyValue() n times, +// except it pre-allocates first so it is more efficient. +func (b *SparseUnionBuilder) AppendEmptyValues(n int) { + b.Reserve(n) + firstChildCode := b.codes[0] + for _, c := range b.codes { + b.typeIDtoBuilder[c].Reserve(n) + } + for i := 0; i < n; i++ { + b.typesBuilder.AppendValue(firstChildCode) + for _, c := range b.codes { + b.typeIDtoBuilder[c].AppendEmptyValue() + } + } +} + +// Append appends an element to the UnionArray and must be followed up +// by an append to the appropriate child builder. The parameter should +// be the type id of the child to which the next value will be appended. +// +// After appending to the corresponding child builder, all other child +// builders should have a null or empty value appended to them (although +// this is not enfoced and any value is theoretically allowed and will be +// ignored). +func (b *SparseUnionBuilder) Append(nextType arrow.UnionTypeCode) { + b.typesBuilder.AppendValue(nextType) +} + +func (b *SparseUnionBuilder) NewArray() arrow.Array { + return b.NewSparseUnionArray() +} + +func (b *SparseUnionBuilder) NewSparseUnionArray() (a *SparseUnion) { + data := b.newData() + a = NewSparseUnionData(data) + data.Release() + return +} + +func (b *SparseUnionBuilder) UnmarshalJSON(data []byte) (err error) { + dec := json.NewDecoder(bytes.NewReader(data)) + t, err := dec.Token() + if err != nil { + return err + } + + if delim, ok := t.(json.Delim); !ok || delim != '[' { + return fmt.Errorf("sparse union builder must unpack from json array, found %s", t) + } + return b.unmarshal(dec) +} + +func (b *SparseUnionBuilder) unmarshal(dec *json.Decoder) error { + for dec.More() { + if err := b.unmarshalOne(dec); err != nil { + return err + } + } + return nil +} + +func (b *SparseUnionBuilder) unmarshalOne(dec *json.Decoder) error { + t, err := dec.Token() + if err != nil { + return err + } + + switch t { + case json.Delim('['): + // should be [type_id, Value] + typeID, err := dec.Token() + if err != nil { + return err + } + + var typeCode int8 + + switch tid := typeID.(type) { + case json.Number: + id, err := tid.Int64() + if err != nil { + return err + } + typeCode = int8(id) + case float64: + if tid != float64(int64(tid)) { + return &json.UnmarshalTypeError{ + Offset: dec.InputOffset(), + Type: reflect.TypeOf(int8(0)), + Struct: fmt.Sprint(b.Type()), + Value: "float", + } + } + typeCode = int8(tid) + } + + childNum := b.typeIDtoChildID[typeCode] + if childNum == arrow.InvalidUnionChildID { + return &json.UnmarshalTypeError{ + Offset: dec.InputOffset(), + Value: "invalid type code", + } + } + + for i, c := range b.children { + if i != childNum { + c.AppendNull() + } + } + + b.Append(typeCode) + if err := b.children[childNum].unmarshalOne(dec); err != nil { + return err + } + + endArr, err := dec.Token() + if err != nil { + return err + } + + if endArr != json.Delim(']') { + return &json.UnmarshalTypeError{ + Offset: dec.InputOffset(), + Value: "union value array should have exactly 2 elements", + } + } + case nil: + b.AppendNull() + default: + return &json.UnmarshalTypeError{ + Offset: dec.InputOffset(), + Value: fmt.Sprint(t), + Struct: fmt.Sprint(b.Type()), + } + } + return nil +} + +// DenseUnionBuilder is used to build a Dense Union array using the Append +// methods. You can also add new types to the union on the fly by using +// AppendChild. +type DenseUnionBuilder struct { + unionBuilder + + offsetsBuilder *int32BufferBuilder +} + +// NewEmptyDenseUnionBuilder is a helper to construct a DenseUnionBuilder +// without having to predefine the union types. It creates a builder with no +// children and AppendChild will have to be called before appending any +// elements to this builder. +func NewEmptyDenseUnionBuilder(mem memory.Allocator) *DenseUnionBuilder { + return &DenseUnionBuilder{ + unionBuilder: newUnionBuilder(mem, nil, arrow.DenseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})), + offsetsBuilder: newInt32BufferBuilder(mem), + } +} + +// NewDenseUnionBuilder constructs a new DenseUnionBuilder with the provided +// children and type codes. Builders will be constructed for each child +// using the fields in typ +func NewDenseUnionBuilder(mem memory.Allocator, typ *arrow.DenseUnionType) *DenseUnionBuilder { + children := make([]Builder, len(typ.Fields())) + for i, f := range typ.Fields() { + children[i] = NewBuilder(mem, f.Type) + defer children[i].Release() + } + return NewDenseUnionBuilderWithBuilders(mem, typ, children) +} + +// NewDenseUnionWithBuilders returns a new DenseUnionBuilder using the +// provided type and builders. +func NewDenseUnionBuilderWithBuilders(mem memory.Allocator, typ *arrow.DenseUnionType, children []Builder) *DenseUnionBuilder { + return &DenseUnionBuilder{ + unionBuilder: newUnionBuilder(mem, children, typ), + offsetsBuilder: newInt32BufferBuilder(mem), + } +} + +func (b *DenseUnionBuilder) Reserve(n int) { + b.reserve(n, b.Resize) +} + +func (b *DenseUnionBuilder) Resize(n int) { + b.typesBuilder.resize(n) + b.offsetsBuilder.resize(n * arrow.Int32SizeBytes) +} + +// AppendNull will only append a null value arbitrarily to the first child +// and use that offset for this element of the array. +func (b *DenseUnionBuilder) AppendNull() { + firstChildCode := b.codes[0] + childBuilder := b.typeIDtoBuilder[firstChildCode] + b.typesBuilder.AppendValue(firstChildCode) + b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) + childBuilder.AppendNull() +} + +// AppendNulls will only append a single null arbitrarily to the first child +// and use the same offset multiple times to point to it. The result is that +// for a DenseUnion this is more efficient than calling AppendNull multiple +// times in a loop +func (b *DenseUnionBuilder) AppendNulls(n int) { + // only append 1 null to the child builder, use the same offset twice + firstChildCode := b.codes[0] + childBuilder := b.typeIDtoBuilder[firstChildCode] + b.Reserve(n) + for i := 0; i < n; i++ { + b.typesBuilder.AppendValue(firstChildCode) + b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) + } + // only append a single null to the child builder, the offsets all refer to the same value + childBuilder.AppendNull() +} + +// AppendEmptyValue only appends an empty value arbitrarily to the first child, +// and then uses that offset to identify the value. +func (b *DenseUnionBuilder) AppendEmptyValue() { + firstChildCode := b.codes[0] + childBuilder := b.typeIDtoBuilder[firstChildCode] + b.typesBuilder.AppendValue(firstChildCode) + b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) + childBuilder.AppendEmptyValue() +} + +// AppendEmptyValues, like AppendNulls, will only append a single empty value +// (implementation defined) to the first child arbitrarily, and then point +// at that value using the offsets n times. That makes this more efficient +// than calling AppendEmptyValue multiple times. +func (b *DenseUnionBuilder) AppendEmptyValues(n int) { + // only append 1 null to the child builder, use the same offset twice + firstChildCode := b.codes[0] + childBuilder := b.typeIDtoBuilder[firstChildCode] + b.Reserve(n) + for i := 0; i < n; i++ { + b.typesBuilder.AppendValue(firstChildCode) + b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) + } + // only append a single empty value to the child builder, the offsets all + // refer to the same value + childBuilder.AppendEmptyValue() +} + +// Append appends the necessary offset and type code to the builder +// and must be followed up with an append to the appropriate child builder +func (b *DenseUnionBuilder) Append(nextType arrow.UnionTypeCode) { + b.typesBuilder.AppendValue(nextType) + bldr := b.typeIDtoBuilder[nextType] + if bldr.Len() == kMaxElems { + panic("a dense UnionArray cannot contain more than 2^31 - 1 elements from a single child") + } + + b.offsetsBuilder.AppendValue(int32(bldr.Len())) +} + +func (b *DenseUnionBuilder) Release() { + debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") + + if atomic.AddInt64(&b.refCount, -1) == 0 { + for _, c := range b.children { + c.Release() + } + b.typesBuilder.Release() + b.offsetsBuilder.Release() + } +} + +func (b *DenseUnionBuilder) newData() *Data { + data := b.unionBuilder.newData() + data.buffers = append(data.buffers, b.offsetsBuilder.Finish()) + return data +} + +func (b *DenseUnionBuilder) NewArray() arrow.Array { + return b.NewDenseUnionArray() +} + +func (b *DenseUnionBuilder) NewDenseUnionArray() (a *DenseUnion) { + data := b.newData() + a = NewDenseUnionData(data) + data.Release() + return +} + +func (b *DenseUnionBuilder) UnmarshalJSON(data []byte) (err error) { + dec := json.NewDecoder(bytes.NewReader(data)) + t, err := dec.Token() + if err != nil { + return err + } + + if delim, ok := t.(json.Delim); !ok || delim != '[' { + return fmt.Errorf("dense union builder must unpack from json array, found %s", t) + } + return b.unmarshal(dec) +} + +func (b *DenseUnionBuilder) unmarshal(dec *json.Decoder) error { + for dec.More() { + if err := b.unmarshalOne(dec); err != nil { + return err + } + } + return nil +} + +func (b *DenseUnionBuilder) unmarshalOne(dec *json.Decoder) error { + t, err := dec.Token() + if err != nil { + return err + } + + switch t { + case json.Delim('['): + // should be [type_id, Value] + typeID, err := dec.Token() + if err != nil { + return err + } + + var typeCode int8 + + switch tid := typeID.(type) { + case json.Number: + id, err := tid.Int64() + if err != nil { + return err + } + typeCode = int8(id) + case float64: + if tid != float64(int64(tid)) { + return &json.UnmarshalTypeError{ + Offset: dec.InputOffset(), + Type: reflect.TypeOf(int8(0)), + Struct: fmt.Sprint(b.Type()), + Value: "float", + } + } + typeCode = int8(tid) + } + + childNum := b.typeIDtoChildID[typeCode] + if childNum == arrow.InvalidUnionChildID { + return &json.UnmarshalTypeError{ + Offset: dec.InputOffset(), + Value: "invalid type code", + } + } + + b.Append(typeCode) + if err := b.children[childNum].unmarshalOne(dec); err != nil { + return err + } + + endArr, err := dec.Token() + if err != nil { + return err + } + + if endArr != json.Delim(']') { + return &json.UnmarshalTypeError{ + Offset: dec.InputOffset(), + Value: "union value array should have exactly 2 elements", + } + } + case nil: + b.AppendNull() + default: + return &json.UnmarshalTypeError{ + Offset: dec.InputOffset(), + Value: fmt.Sprint(t), + Struct: fmt.Sprint(b.Type()), + } + } + return nil +} + +var ( + _ arrow.Array = (*SparseUnion)(nil) + _ arrow.Array = (*DenseUnion)(nil) + _ Union = (*SparseUnion)(nil) + _ Union = (*DenseUnion)(nil) + _ Builder = (*SparseUnionBuilder)(nil) + _ Builder = (*DenseUnionBuilder)(nil) + _ UnionBuilder = (*SparseUnionBuilder)(nil) + _ UnionBuilder = (*DenseUnionBuilder)(nil) +) diff --git a/go/arrow/array/union_test.go b/go/arrow/array/union_test.go new file mode 100644 index 0000000000000..ca6122c0ae96f --- /dev/null +++ b/go/arrow/array/union_test.go @@ -0,0 +1,952 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array_test + +import ( + "strings" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +func uint8ArrFromSlice(ids ...uint8) arrow.Array { + data := array.NewData(arrow.PrimitiveTypes.Uint8, len(ids), + []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes(ids))}, nil, 0, 0) + defer data.Release() + return array.MakeFromData(data) +} + +func int32ArrFromSlice(offsets ...int32) arrow.Array { + data := array.NewData(arrow.PrimitiveTypes.Int32, len(offsets), + []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))}, nil, 0, 0) + defer data.Release() + return array.MakeFromData(data) +} + +func TestUnionSliceEquals(t *testing.T) { + unionFields := []arrow.Field{ + {Name: "u0", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + {Name: "u1", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}, + } + + typeCodes := []arrow.UnionTypeCode{5, 10} + sparseType := arrow.SparseUnionOf(unionFields, typeCodes) + denseType := arrow.DenseUnionOf(unionFields, typeCodes) + + schema := arrow.NewSchema([]arrow.Field{ + {Name: "sparse", Type: sparseType, Nullable: true}, + {Name: "dense", Type: denseType, Nullable: true}, + }, nil) + + sparseChildren := make([]arrow.Array, 2) + denseChildren := make([]arrow.Array, 2) + + const length = 7 + + typeIDsBuffer := memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes([]uint8{5, 10, 5, 5, 10, 10, 5})) + sparseChildren[0] = int32ArrFromSlice(0, 1, 2, 3, 4, 5, 6) + defer sparseChildren[0].Release() + sparseChildren[1] = uint8ArrFromSlice(10, 11, 12, 13, 14, 15, 16) + defer sparseChildren[1].Release() + + denseChildren[0] = int32ArrFromSlice(0, 2, 3, 7) + defer denseChildren[0].Release() + denseChildren[1] = uint8ArrFromSlice(11, 14, 15) + defer denseChildren[1].Release() + + offsetsBuffer := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, 0, 1, 2, 1, 2, 3})) + sparse := array.NewSparseUnion(sparseType, length, sparseChildren, typeIDsBuffer, 0) + dense := array.NewDenseUnion(denseType, length, denseChildren, typeIDsBuffer, offsetsBuffer, 0) + + defer sparse.Release() + defer dense.Release() + + batch := array.NewRecord(schema, []arrow.Array{sparse, dense}, -1) + defer batch.Release() + + checkUnion := func(arr arrow.Array) { + size := arr.Len() + slice := array.NewSlice(arr, 2, int64(size)) + defer slice.Release() + assert.EqualValues(t, size-2, slice.Len()) + + slice2 := array.NewSlice(arr, 2, int64(arr.Len())) + defer slice2.Release() + assert.EqualValues(t, size-2, slice2.Len()) + + assert.True(t, array.Equal(slice, slice2)) + assert.True(t, array.SliceEqual(arr, 2, int64(arr.Len()), slice, 0, int64(slice.Len()))) + + // chain slices + slice2 = array.NewSlice(arr, 1, int64(arr.Len())) + defer slice2.Release() + slice2 = array.NewSlice(slice2, 1, int64(slice2.Len())) + defer slice2.Release() + assert.True(t, array.Equal(slice, slice2)) + + slice, slice2 = array.NewSlice(arr, 1, 6), array.NewSlice(arr, 1, 6) + defer slice.Release() + defer slice2.Release() + assert.EqualValues(t, 5, slice.Len()) + + assert.True(t, array.Equal(slice, slice2)) + assert.True(t, array.SliceEqual(arr, 1, 6, slice, 0, 5)) + } + + checkUnion(batch.Column(0)) + checkUnion(batch.Column(1)) +} + +func TestSparseUnionGetFlattenedField(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + ty := arrow.SparseUnionOf([]arrow.Field{ + {Name: "ints", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, + {Name: "strs", Type: arrow.BinaryTypes.String, Nullable: true}, + }, []arrow.UnionTypeCode{2, 7}) + ints, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[0, 1, 2, 3]`)) + defer ints.Release() + strs, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["a", null, "c", "d"]`)) + defer strs.Release() + idsArr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[2, 7, 2, 7]`)) + defer idsArr.Release() + ids := idsArr.Data().Buffers()[1] + + const length = 4 + + t.Run("flattened", func(t *testing.T) { + scoped := memory.NewCheckedAllocatorScope(mem) + defer scoped.CheckSize(t) + + arr := array.NewSparseUnion(ty, length, []arrow.Array{ints, strs}, ids, 0) + defer arr.Release() + + flattened, err := arr.GetFlattenedField(mem, 0) + assert.NoError(t, err) + defer flattened.Release() + expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[0, null, 2, null]`)) + defer expected.Release() + + assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) + + flattened, err = arr.GetFlattenedField(mem, 1) + assert.NoError(t, err) + defer flattened.Release() + expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, null, null, "d"]`)) + defer expected.Release() + + assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) + + sliced := array.NewSlice(arr, 1, 3).(*array.SparseUnion) + defer sliced.Release() + + flattened, err = sliced.GetFlattenedField(mem, 0) + assert.NoError(t, err) + defer flattened.Release() + expected, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[null, 2]`)) + defer expected.Release() + + assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) + + flattened, err = sliced.GetFlattenedField(mem, 1) + assert.NoError(t, err) + defer flattened.Release() + expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, null]`)) + defer expected.Release() + + assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) + + _, err = arr.GetFlattenedField(mem, -1) + assert.Error(t, err) + _, err = arr.GetFlattenedField(mem, 2) + assert.Error(t, err) + }) + + t.Run("offset children", func(t *testing.T) { + scoped := memory.NewCheckedAllocatorScope(mem) + defer scoped.CheckSize(t) + + strSlice, intSlice := array.NewSlice(strs, 1, 3), array.NewSlice(ints, 1, 3) + defer strSlice.Release() + defer intSlice.Release() + + arr := array.NewSparseUnion(ty, length-2, []arrow.Array{intSlice, strSlice}, ids, 0) + defer arr.Release() + + flattened, err := arr.GetFlattenedField(mem, 0) + assert.NoError(t, err) + defer flattened.Release() + expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[1, null]`)) + defer expected.Release() + + assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) + + flattened, err = arr.GetFlattenedField(mem, 1) + assert.NoError(t, err) + defer flattened.Release() + expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, "c"]`)) + defer expected.Release() + + assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) + + sliced := array.NewSlice(arr, 1, 2).(*array.SparseUnion) + defer sliced.Release() + + flattened, err = sliced.GetFlattenedField(mem, 0) + assert.NoError(t, err) + defer flattened.Release() + expected, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[null]`)) + defer expected.Release() + + assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) + + flattened, err = sliced.GetFlattenedField(mem, 1) + assert.NoError(t, err) + defer flattened.Release() + expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["c"]`)) + defer expected.Release() + + assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) + }) + + t.Run("empty flattened", func(t *testing.T) { + scoped := memory.NewCheckedAllocatorScope(mem) + defer scoped.CheckSize(t) + + strSlice, intSlice := array.NewSlice(strs, length, length), array.NewSlice(ints, length, length) + defer strSlice.Release() + defer intSlice.Release() + + arr := array.NewSparseUnion(ty, 0, []arrow.Array{intSlice, strSlice}, ids, 0) + defer arr.Release() + + flattened, err := arr.GetFlattenedField(mem, 0) + assert.NoError(t, err) + defer flattened.Release() + expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[]`)) + defer expected.Release() + + assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) + + flattened, err = arr.GetFlattenedField(mem, 1) + assert.NoError(t, err) + defer flattened.Release() + expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[]`)) + defer expected.Release() + + assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) + }) +} + +func TestSparseUnionValidate(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + a, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[4, 5]`)) + defer a.Release() + dt := arrow.SparseUnionOf([]arrow.Field{{Name: "a", Type: arrow.PrimitiveTypes.Int32, Nullable: true}}, []arrow.UnionTypeCode{0}) + children := []arrow.Array{a} + + typeIDsArr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, 0, 0]`)) + defer typeIDsArr.Release() + typeIDs := typeIDsArr.Data().Buffers()[1] + + arr := array.NewSparseUnion(dt, 2, children, typeIDs, 0) + assert.NoError(t, arr.ValidateFull()) + arr.Release() + + arr = array.NewSparseUnion(dt, 1, children, typeIDs, 1) + assert.NoError(t, arr.ValidateFull()) + arr.Release() + + arr = array.NewSparseUnion(dt, 0, children, typeIDs, 2) + assert.NoError(t, arr.ValidateFull()) + arr.Release() + + // length + offset < child length but that's ok! + arr = array.NewSparseUnion(dt, 1, children, typeIDs, 0) + assert.NoError(t, arr.ValidateFull()) + arr.Release() + + // length + offset > child length! BAD! + assert.Panics(t, func() { + arr = array.NewSparseUnion(dt, 1, children, typeIDs, 2) + }) + + // offset > child length + assert.Panics(t, func() { + arr = array.NewSparseUnion(dt, 0, children, typeIDs, 3) + }) +} + +type UnionFactorySuite struct { + suite.Suite + + mem *memory.CheckedAllocator + codes []arrow.UnionTypeCode + typeIDs arrow.Array + logicalTypeIDs arrow.Array + invalidTypeIDs arrow.Array + invalidTypeIDs2 arrow.Array +} + +func (s *UnionFactorySuite) typeidsFromSlice(ids ...int8) arrow.Array { + data := array.NewData(arrow.PrimitiveTypes.Int8, len(ids), + []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int8Traits.CastToBytes(ids))}, nil, 0, 0) + defer data.Release() + return array.MakeFromData(data) +} + +func (s *UnionFactorySuite) offsetsFromSlice(offsets ...int32) arrow.Array { + data := array.NewData(arrow.PrimitiveTypes.Int32, len(offsets), + []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))}, nil, 0, 0) + defer data.Release() + return array.MakeFromData(data) +} + +func (s *UnionFactorySuite) SetupTest() { + s.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) + s.codes = []arrow.UnionTypeCode{1, 2, 4, 127} + s.typeIDs = s.typeidsFromSlice(0, 1, 2, 0, 1, 3, 2, 0, 2, 1) + s.logicalTypeIDs = s.typeidsFromSlice(1, 2, 4, 1, 2, 127, 4, 1, 4, 2) + s.invalidTypeIDs = s.typeidsFromSlice(1, 2, 4, 1, -2, 127, 4, 1, 4, 2) + s.invalidTypeIDs2 = s.typeidsFromSlice(1, 2, 4, 1, 3, 127, 4, 1, 4, 2) +} + +func (s *UnionFactorySuite) TearDownTest() { + s.typeIDs.Release() + s.logicalTypeIDs.Release() + s.invalidTypeIDs.Release() + s.invalidTypeIDs2.Release() + s.mem.AssertSize(s.T(), 0) +} + +func (s *UnionFactorySuite) checkFields(arr array.Union, fields []string) { + ty := arr.DataType().(arrow.UnionType) + s.Len(ty.Fields(), len(fields)) + for i, f := range ty.Fields() { + s.Equal(fields[i], f.Name) + } +} + +func (s *UnionFactorySuite) checkCodes(arr array.Union, codes []arrow.UnionTypeCode) { + ty := arr.DataType().(arrow.UnionType) + s.Equal(codes, ty.TypeCodes()) +} + +func (s *UnionFactorySuite) checkUnion(arr array.Union, mode arrow.UnionMode, fields []string, codes []arrow.UnionTypeCode) { + s.Equal(mode, arr.Mode()) + s.checkFields(arr, fields) + s.checkCodes(arr, codes) + typeIDs := s.typeIDs.(*array.Int8) + for i := 0; i < typeIDs.Len(); i++ { + s.EqualValues(typeIDs.Value(i), arr.ChildID(i)) + } + s.Nil(arr.Field(-1)) + s.Nil(arr.Field(typeIDs.Len())) +} + +func (s *UnionFactorySuite) TestMakeDenseUnions() { + // typeIDs: {0, 1, 2, 0, 1, 3, 2, 0, 2, 1} + offsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 0, 1, 2, 1, 2) + defer offsets.Release() + + children := make([]arrow.Array, 4) + children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "def", "xyz"]`)) + children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, strings.NewReader(`[10, 20, 30]`)) + children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.618, 2.718, 3.142]`)) + children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[-12]`)) + for _, c := range children { + defer c.Release() + } + + fieldNames := []string{"str", "int1", "real", "int2"} + + s.Run("without fields and codes", func() { + result, err := array.NewDenseUnionFromArrays(s.typeIDs, offsets, children) + s.NoError(err) + defer result.Release() + s.NoError(result.ValidateFull()) + s.checkUnion(result, arrow.DenseMode, []string{"0", "1", "2", "3"}, []arrow.UnionTypeCode{0, 1, 2, 3}) + }) + + s.Run("with fields", func() { + _, err := array.NewDenseUnionFromArraysWithFields(s.typeIDs, offsets, children, []string{"one"}) + s.Error(err) + result, err := array.NewDenseUnionFromArraysWithFields(s.typeIDs, offsets, children, fieldNames) + s.NoError(err) + defer result.Release() + s.NoError(result.ValidateFull()) + s.checkUnion(result, arrow.DenseMode, fieldNames, []arrow.UnionTypeCode{0, 1, 2, 3}) + }) + + s.Run("with codes", func() { + _, err := array.NewDenseUnionFromArrays(s.logicalTypeIDs, offsets, children, 0) + s.Error(err) + result, err := array.NewDenseUnionFromArrays(s.logicalTypeIDs, offsets, children, s.codes...) + s.NoError(err) + defer result.Release() + s.NoError(result.ValidateFull()) + s.checkUnion(result, arrow.DenseMode, []string{"0", "1", "2", "3"}, s.codes) + }) + + s.Run("with fields and codes", func() { + _, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, []string{"one"}, s.codes) + s.Error(err) + result, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, fieldNames, s.codes) + s.NoError(err) + defer result.Release() + s.NoError(result.ValidateFull()) + s.checkUnion(result, arrow.DenseMode, fieldNames, s.codes) + }) + + s.Run("invalid type codes", func() { + result, err := array.NewDenseUnionFromArrays(s.invalidTypeIDs, offsets, children, s.codes...) + s.NoError(err) + defer result.Release() + s.Error(result.ValidateFull()) + result, err = array.NewDenseUnionFromArrays(s.invalidTypeIDs2, offsets, children, s.codes...) + s.NoError(err) + defer result.Release() + s.Error(result.ValidateFull()) + }) + + s.Run("invalid offsets", func() { + // offset out of bounds at index 5 + invalidOffsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 1, 1, 2, 1, 2) + defer invalidOffsets.Release() + result, err := array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) + s.NoError(err) + defer result.Release() + s.Error(result.ValidateFull()) + + // negative offset at index 5 + invalidOffsets = s.offsetsFromSlice(0, 0, 0, 1, 1, -1, 1, 2, 1, 2) + defer invalidOffsets.Release() + result, err = array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) + s.NoError(err) + defer result.Release() + s.Error(result.ValidateFull()) + + // non-monotonic offset at index 3 + invalidOffsets = s.offsetsFromSlice(1, 0, 0, 0, 1, 0, 1, 2, 1, 2) + defer invalidOffsets.Release() + result, err = array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) + s.NoError(err) + defer result.Release() + s.Error(result.ValidateFull()) + }) +} + +func (s *UnionFactorySuite) TestMakeSparse() { + children := make([]arrow.Array, 4) + children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, + strings.NewReader(`["abc", "", "", "def", "", "", "", "xyz", "", ""]`)) + children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, + strings.NewReader(`[0, 10, 0, 0, 20, 0, 0, 0, 0, 30]`)) + children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, + strings.NewReader(`[0.0, 0.0, 1.618, 0.0, 0.0, 0.0, 2.718, 0.0, 3.142, 0.0]`)) + children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, + strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0, 0]`)) + for _, c := range children { + defer c.Release() + } + + fieldNames := []string{"str", "int1", "real", "int2"} + + s.Run("without fields and codes", func() { + result, err := array.NewSparseUnionFromArrays(s.typeIDs, children) + s.NoError(err) + defer result.Release() + s.NoError(result.ValidateFull()) + s.checkUnion(result, arrow.SparseMode, []string{"0", "1", "2", "3"}, []arrow.UnionTypeCode{0, 1, 2, 3}) + }) + + s.Run("with fields", func() { + _, err := array.NewSparseUnionFromArraysWithFields(s.typeIDs, children, []string{"one"}) + s.Error(err) + result, err := array.NewSparseUnionFromArraysWithFields(s.typeIDs, children, fieldNames) + s.NoError(err) + defer result.Release() + s.NoError(result.ValidateFull()) + s.checkUnion(result, arrow.SparseMode, fieldNames, []arrow.UnionTypeCode{0, 1, 2, 3}) + }) + + s.Run("with codes", func() { + _, err := array.NewSparseUnionFromArrays(s.logicalTypeIDs, children, 0) + s.Error(err) + result, err := array.NewSparseUnionFromArrays(s.logicalTypeIDs, children, s.codes...) + s.NoError(err) + defer result.Release() + s.NoError(result.ValidateFull()) + s.checkUnion(result, arrow.SparseMode, []string{"0", "1", "2", "3"}, s.codes) + }) + + s.Run("with fields and codes", func() { + _, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, []string{"one"}, s.codes) + s.Error(err) + result, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, fieldNames, s.codes) + s.NoError(err) + defer result.Release() + s.NoError(result.ValidateFull()) + s.checkUnion(result, arrow.SparseMode, fieldNames, s.codes) + }) + + s.Run("invalid type codes", func() { + result, err := array.NewSparseUnionFromArrays(s.invalidTypeIDs, children, s.codes...) + s.NoError(err) + defer result.Release() + s.Error(result.ValidateFull()) + result, err = array.NewSparseUnionFromArrays(s.invalidTypeIDs2, children, s.codes...) + s.NoError(err) + defer result.Release() + s.Error(result.ValidateFull()) + }) + + s.Run("invalid child length", func() { + children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, + strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0]`)) + defer children[3].Release() + + _, err := array.NewSparseUnionFromArrays(s.typeIDs, children) + s.Error(err) + }) +} + +type UnionBuilderSuite struct { + suite.Suite + + I8 arrow.UnionTypeCode + STR arrow.UnionTypeCode + DBL arrow.UnionTypeCode + + mem *memory.CheckedAllocator + expectedTypes []arrow.UnionTypeCode + expectedTypesArr arrow.Array + i8Bldr *array.Int8Builder + strBldr *array.StringBuilder + dblBldr *array.Float64Builder + unionBldr array.UnionBuilder + actual array.Union +} + +func (s *UnionBuilderSuite) SetupTest() { + s.I8, s.STR, s.DBL = 8, 13, 7 + + s.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) + s.expectedTypes = make([]arrow.UnionTypeCode, 0) + + s.i8Bldr = array.NewInt8Builder(s.mem) + s.strBldr = array.NewStringBuilder(s.mem) + s.dblBldr = array.NewFloat64Builder(s.mem) +} + +func (s *UnionBuilderSuite) TearDownTest() { + if s.expectedTypesArr != nil { + s.expectedTypesArr.Release() + s.expectedTypesArr = nil + } + s.i8Bldr.Release() + s.strBldr.Release() + s.dblBldr.Release() + if s.actual != nil { + s.actual.Release() + s.actual = nil + } + + s.mem.AssertSize(s.T(), 0) +} + +func (s *UnionBuilderSuite) createExpectedTypesArr() { + data := array.NewData(arrow.PrimitiveTypes.Int8, len(s.expectedTypes), + []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int8Traits.CastToBytes(s.expectedTypes))}, nil, 0, 0) + defer data.Release() + s.expectedTypesArr = array.MakeFromData(data) +} + +func (s *UnionBuilderSuite) appendInt(i int8) { + s.expectedTypes = append(s.expectedTypes, s.I8) + s.unionBldr.Append(s.I8) + s.i8Bldr.Append(i) + if s.unionBldr.Mode() == arrow.SparseMode { + s.strBldr.AppendEmptyValue() + s.dblBldr.AppendEmptyValue() + } +} + +func (s *UnionBuilderSuite) appendString(str string) { + s.expectedTypes = append(s.expectedTypes, s.STR) + s.unionBldr.Append(s.STR) + s.strBldr.Append(str) + if s.unionBldr.Mode() == arrow.SparseMode { + s.i8Bldr.AppendEmptyValue() + s.dblBldr.AppendEmptyValue() + } +} + +func (s *UnionBuilderSuite) appendDbl(dbl float64) { + s.expectedTypes = append(s.expectedTypes, s.DBL) + s.unionBldr.Append(s.DBL) + s.dblBldr.Append(dbl) + if s.unionBldr.Mode() == arrow.SparseMode { + s.strBldr.AppendEmptyValue() + s.i8Bldr.AppendEmptyValue() + } +} + +func (s *UnionBuilderSuite) appendBasics() { + s.appendInt(33) + s.appendString("abc") + s.appendDbl(1.0) + s.appendDbl(-1.0) + s.appendString("") + s.appendInt(10) + s.appendString("def") + s.appendInt(-10) + s.appendDbl(0.5) + + s.actual = s.unionBldr.NewArray().(array.Union) + s.NoError(s.actual.ValidateFull()) + s.createExpectedTypesArr() +} + +func (s *UnionBuilderSuite) appendNullsAndEmptyValues() { + s.appendString("abc") + s.unionBldr.AppendNull() + s.unionBldr.AppendEmptyValue() + s.expectedTypes = append(s.expectedTypes, s.I8, s.I8, s.I8) + s.appendInt(42) + s.unionBldr.AppendNulls(2) + s.unionBldr.AppendEmptyValues(2) + s.expectedTypes = append(s.expectedTypes, s.I8, s.I8, s.I8) + + s.actual = s.unionBldr.NewArray().(array.Union) + s.NoError(s.actual.ValidateFull()) + s.createExpectedTypesArr() +} + +func (s *UnionBuilderSuite) appendInferred() { + s.I8 = s.unionBldr.AppendChild(s.i8Bldr, "i8") + s.EqualValues(0, s.I8) + s.appendInt(33) + s.appendInt(10) + + s.STR = s.unionBldr.AppendChild(s.strBldr, "str") + s.EqualValues(1, s.STR) + s.appendString("abc") + s.appendString("") + s.appendString("def") + s.appendInt(-10) + + s.DBL = s.unionBldr.AppendChild(s.dblBldr, "dbl") + s.EqualValues(2, s.DBL) + s.appendDbl(1.0) + s.appendDbl(-1.0) + s.appendDbl(0.5) + + s.actual = s.unionBldr.NewArray().(array.Union) + s.NoError(s.actual.ValidateFull()) + s.createExpectedTypesArr() + + s.EqualValues(0, s.I8) + s.EqualValues(1, s.STR) + s.EqualValues(2, s.DBL) +} + +func (s *UnionBuilderSuite) appendListOfInferred(utyp arrow.UnionType) *array.List { + listBldr := array.NewListBuilder(s.mem, utyp) + defer listBldr.Release() + + s.unionBldr = listBldr.ValueBuilder().(array.UnionBuilder) + + listBldr.Append(true) + s.I8 = s.unionBldr.AppendChild(s.i8Bldr, "i8") + s.EqualValues(0, s.I8) + s.appendInt(10) + + listBldr.Append(true) + s.STR = s.unionBldr.AppendChild(s.strBldr, "str") + s.EqualValues(1, s.STR) + s.appendString("abc") + s.appendInt(-10) + + listBldr.Append(true) + s.DBL = s.unionBldr.AppendChild(s.dblBldr, "dbl") + s.EqualValues(2, s.DBL) + s.appendDbl(0.5) + + s.createExpectedTypesArr() + return listBldr.NewListArray() +} + +func (s *UnionBuilderSuite) assertArraysEqual(expected, actual arrow.Array) { + s.Truef(array.Equal(expected, actual), "expected: %s, got: %s", expected, actual) +} + +func (s *UnionBuilderSuite) TestDenseUnionBasics() { + s.unionBldr = array.NewDenseUnionBuilderWithBuilders(s.mem, + arrow.DenseUnionOf([]arrow.Field{ + {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, + {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, + }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), + []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) + defer s.unionBldr.Release() + + s.appendBasics() + + expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[33, 10, -10]`)) + expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "def"]`)) + expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.0, -1.0, 0.5]`)) + expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 0, 0, 1, 1, 1, 2, 2, 2]`)) + + defer func() { + expectedI8.Release() + expectedStr.Release() + expectedDbl.Release() + expectedOffsets.Release() + }() + + expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, + expectedOffsets, + []arrow.Array{expectedI8, expectedStr, expectedDbl}, + []string{"i8", "str", "dbl"}, + []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) + s.NoError(err) + defer expected.Release() + + s.Equal(expected.DataType().String(), s.actual.DataType().String()) + s.assertArraysEqual(expected, s.actual) +} + +func (s *UnionBuilderSuite) TestDenseBuilderNullsAndEmpty() { + s.unionBldr = array.NewDenseUnionBuilderWithBuilders(s.mem, + arrow.DenseUnionOf([]arrow.Field{ + {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, + {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, + }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), + []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) + defer s.unionBldr.Release() + + s.appendNullsAndEmptyValues() + + // four null / empty values (the latter implementation-defined) appended to I8 + expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[null, 0, 42, null, 0]`)) + expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc"]`)) + expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[]`)) + expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 0, 1, 2, 3, 3, 4, 4]`)) + + defer func() { + expectedI8.Release() + expectedStr.Release() + expectedDbl.Release() + expectedOffsets.Release() + }() + + expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, + expectedOffsets, + []arrow.Array{expectedI8, expectedStr, expectedDbl}, + []string{"i8", "str", "dbl"}, + []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) + s.NoError(err) + defer expected.Release() + + s.Equal(expected.DataType().String(), s.actual.DataType().String()) + s.assertArraysEqual(expected, s.actual) + + // physical arrays must be as expected + s.assertArraysEqual(expectedI8, s.actual.Field(0)) + s.assertArraysEqual(expectedStr, s.actual.Field(1)) + s.assertArraysEqual(expectedDbl, s.actual.Field(2)) +} + +func (s *UnionBuilderSuite) TestDenseUnionInferredTyped() { + s.unionBldr = array.NewEmptyDenseUnionBuilder(s.mem) + defer s.unionBldr.Release() + + s.appendInferred() + + expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[33, 10, -10]`)) + expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "def"]`)) + expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.0, -1.0, 0.5]`)) + expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 1, 0, 1, 2, 2, 0, 1, 2]`)) + + defer func() { + expectedI8.Release() + expectedStr.Release() + expectedDbl.Release() + expectedOffsets.Release() + }() + + expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, + expectedOffsets, + []arrow.Array{expectedI8, expectedStr, expectedDbl}, + []string{"i8", "str", "dbl"}, + []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) + s.NoError(err) + defer expected.Release() + + s.Equal(expected.DataType().String(), s.actual.DataType().String()) + s.assertArraysEqual(expected, s.actual) +} + +func (s *UnionBuilderSuite) TestDenseUnionListOfInferredType() { + actual := s.appendListOfInferred(arrow.DenseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})) + defer actual.Release() + + expectedType := arrow.ListOf(arrow.DenseUnionOf( + []arrow.Field{ + {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, + {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}}, + []arrow.UnionTypeCode{s.I8, s.STR, s.DBL})) + s.Equal(expectedType.String(), actual.DataType().String()) +} + +func (s *UnionBuilderSuite) TestSparseUnionBasics() { + s.unionBldr = array.NewSparseUnionBuilderWithBuilders(s.mem, + arrow.SparseUnionOf([]arrow.Field{ + {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, + {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, + }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), + []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) + defer s.unionBldr.Release() + + s.appendBasics() + + expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, + strings.NewReader(`[33, null, null, null, null, 10, null, -10, null]`)) + expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, + strings.NewReader(`[null, "abc", null, null, "", null, "def", null, null]`)) + expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, + strings.NewReader(`[null, null, 1.0, -1.0, null, null, null, null, 0.5]`)) + + defer func() { + expectedI8.Release() + expectedStr.Release() + expectedDbl.Release() + }() + + expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, + []arrow.Array{expectedI8, expectedStr, expectedDbl}, + []string{"i8", "str", "dbl"}, + []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) + s.NoError(err) + defer expected.Release() + + s.Equal(expected.DataType().String(), s.actual.DataType().String()) + s.assertArraysEqual(expected, s.actual) +} + +func (s *UnionBuilderSuite) TestSparseBuilderNullsAndEmpty() { + s.unionBldr = array.NewSparseUnionBuilderWithBuilders(s.mem, + arrow.SparseUnionOf([]arrow.Field{ + {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, + {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, + }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), + []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) + defer s.unionBldr.Release() + + s.appendNullsAndEmptyValues() + + // "abc", null, 0, 42, null, null, 0, 0 + // getting 0 for empty values is implementation-defined + expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, + strings.NewReader(`[0, null, 0, 42, null, null, 0, 0]`)) + expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, + strings.NewReader(`["abc", "", "", "", "", "", "", ""]`)) + expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, + strings.NewReader(`[0, 0, 0, 0, 0, 0, 0, 0]`)) + + defer func() { + expectedI8.Release() + expectedStr.Release() + expectedDbl.Release() + }() + + expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, + []arrow.Array{expectedI8, expectedStr, expectedDbl}, + []string{"i8", "str", "dbl"}, + []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) + s.NoError(err) + defer expected.Release() + + s.Equal(expected.DataType().String(), s.actual.DataType().String()) + s.assertArraysEqual(expected, s.actual) + + // physical arrays must be as expected + s.assertArraysEqual(expectedI8, s.actual.Field(0)) + s.assertArraysEqual(expectedStr, s.actual.Field(1)) + s.assertArraysEqual(expectedDbl, s.actual.Field(2)) +} + +func (s *UnionBuilderSuite) TestSparseUnionInferredType() { + s.unionBldr = array.NewEmptySparseUnionBuilder(s.mem) + defer s.unionBldr.Release() + + s.appendInferred() + + expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, + strings.NewReader(`[33, 10, null, null, null, -10, null, null, null]`)) + expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, + strings.NewReader(`[null, null, "abc", "", "def", null, null, null, null]`)) + expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, + strings.NewReader(`[null, null, null, null, null, null,1.0, -1.0, 0.5]`)) + + defer func() { + expectedI8.Release() + expectedStr.Release() + expectedDbl.Release() + }() + + expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, + []arrow.Array{expectedI8, expectedStr, expectedDbl}, + []string{"i8", "str", "dbl"}, + []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) + s.NoError(err) + defer expected.Release() + + s.Equal(expected.DataType().String(), s.actual.DataType().String()) + s.assertArraysEqual(expected, s.actual) +} + +func (s *UnionBuilderSuite) TestSparseUnionStructWithUnion() { + bldr := array.NewStructBuilder(s.mem, arrow.StructOf(arrow.Field{Name: "u", Type: arrow.SparseUnionFromArrays(nil, nil, nil)})) + defer bldr.Release() + + unionBldr := bldr.FieldBuilder(0).(array.UnionBuilder) + int32Bldr := array.NewInt32Builder(s.mem) + defer int32Bldr.Release() + + s.EqualValues(0, unionBldr.AppendChild(int32Bldr, "i")) + expectedType := arrow.StructOf(arrow.Field{Name: "u", + Type: arrow.SparseUnionOf([]arrow.Field{{Name: "i", Type: arrow.PrimitiveTypes.Int32, Nullable: true}}, []arrow.UnionTypeCode{0})}) + s.Truef(arrow.TypeEqual(expectedType, bldr.Type()), "expected: %s, got: %s", expectedType, bldr.Type()) +} + +func TestUnions(t *testing.T) { + suite.Run(t, new(UnionFactorySuite)) + suite.Run(t, new(UnionBuilderSuite)) +} diff --git a/go/arrow/array/util.go b/go/arrow/array/util.go index 3945376312d9b..b739704889ebb 100644 --- a/go/arrow/array/util.go +++ b/go/arrow/array/util.go @@ -22,10 +22,10 @@ import ( "io" "strings" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/hashing" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/hashing" "github.com/goccy/go-json" ) @@ -299,3 +299,156 @@ func DictArrayFromJSON(mem memory.Allocator, dt *arrow.DictionaryType, indicesJS return NewDictionaryArray(dt, indices, dict), nil } + +func getMaxBufferLen(dt arrow.DataType, length int) int { + bufferLen := int(bitutil.BytesForBits(int64(length))) + + maxOf := func(bl int) int { + if bl > bufferLen { + return bl + } + return bufferLen + } + + switch dt := dt.(type) { + case *arrow.DictionaryType: + bufferLen = maxOf(getMaxBufferLen(dt.ValueType, length)) + return maxOf(getMaxBufferLen(dt.IndexType, length)) + case *arrow.FixedSizeBinaryType: + return maxOf(dt.ByteWidth * length) + case arrow.FixedWidthDataType: + return maxOf(int(bitutil.BytesForBits(int64(dt.BitWidth()))) * length) + case *arrow.StructType: + for _, f := range dt.Fields() { + bufferLen = maxOf(getMaxBufferLen(f.Type, length)) + } + return bufferLen + case *arrow.SparseUnionType: + // type codes + bufferLen = maxOf(length) + // creates children of the same length of the union + for _, f := range dt.Fields() { + bufferLen = maxOf(getMaxBufferLen(f.Type, length)) + } + return bufferLen + case *arrow.DenseUnionType: + // type codes + bufferLen = maxOf(length) + // offsets + bufferLen = maxOf(arrow.Int32SizeBytes * length) + // create children of length 1 + for _, f := range dt.Fields() { + bufferLen = maxOf(getMaxBufferLen(f.Type, 1)) + } + return bufferLen + case arrow.OffsetsDataType: + return maxOf(dt.OffsetTypeTraits().BytesRequired(length + 1)) + case *arrow.FixedSizeListType: + return maxOf(getMaxBufferLen(dt.Elem(), int(dt.Len())*length)) + case arrow.ExtensionType: + return maxOf(getMaxBufferLen(dt.StorageType(), length)) + default: + panic(fmt.Errorf("arrow/array: arrayofnull not implemented for type %s", dt)) + } +} + +type nullArrayFactory struct { + mem memory.Allocator + dt arrow.DataType + len int + buf *memory.Buffer +} + +func (n *nullArrayFactory) create() *Data { + if n.buf == nil { + bufLen := getMaxBufferLen(n.dt, n.len) + n.buf = memory.NewResizableBuffer(n.mem) + n.buf.Resize(bufLen) + defer n.buf.Release() + } + + var ( + dt = n.dt + bufs = []*memory.Buffer{memory.SliceBuffer(n.buf, 0, int(bitutil.BytesForBits(int64(n.len))))} + childData []arrow.ArrayData + dictData arrow.ArrayData + ) + defer bufs[0].Release() + + if ex, ok := dt.(arrow.ExtensionType); ok { + dt = ex.StorageType() + } + + if nf, ok := dt.(arrow.NestedType); ok { + childData = make([]arrow.ArrayData, len(nf.Fields())) + } + + switch dt := dt.(type) { + case *arrow.NullType: + case *arrow.DictionaryType: + bufs = append(bufs, n.buf) + arr := MakeArrayOfNull(n.mem, dt.ValueType, 0) + defer arr.Release() + dictData = arr.Data() + case arrow.FixedWidthDataType: + bufs = append(bufs, n.buf) + case arrow.BinaryDataType: + bufs = append(bufs, n.buf, n.buf) + case arrow.OffsetsDataType: + bufs = append(bufs, n.buf) + childData[0] = n.createChild(dt, 0, 0) + defer childData[0].Release() + case *arrow.FixedSizeListType: + childData[0] = n.createChild(dt, 0, n.len*int(dt.Len())) + defer childData[0].Release() + case *arrow.StructType: + for i := range dt.Fields() { + childData[i] = n.createChild(dt, i, n.len) + defer childData[i].Release() + } + case arrow.UnionType: + bufs[0].Release() + bufs[0] = nil + bufs = append(bufs, n.buf) + // buffer is zeroed, but 0 may not be a valid type code + if dt.TypeCodes()[0] != 0 { + bufs[1] = memory.NewResizableBuffer(n.mem) + bufs[1].Resize(n.len) + defer bufs[1].Release() + memory.Set(bufs[1].Bytes(), byte(dt.TypeCodes()[0])) + } + + // for sparse unions we create children with the same length + childLen := n.len + if dt.Mode() == arrow.DenseMode { + // for dense unions, offsets are all 0 and make children + // with length 1 + bufs = append(bufs, n.buf) + childLen = 1 + } + for i := range dt.Fields() { + childData[i] = n.createChild(dt, i, childLen) + defer childData[i].Release() + } + } + + out := NewData(n.dt, n.len, bufs, childData, n.len, 0) + if dictData != nil { + out.SetDictionary(dictData) + } + return out +} + +func (n *nullArrayFactory) createChild(dt arrow.DataType, i, length int) *Data { + childFactory := &nullArrayFactory{ + mem: n.mem, dt: n.dt.(arrow.NestedType).Fields()[i].Type, + len: length, buf: n.buf} + return childFactory.create() +} + +// MakeArrayOfNull creates an array of size length which is all null of the given data type. +func MakeArrayOfNull(mem memory.Allocator, dt arrow.DataType, length int) arrow.Array { + data := (&nullArrayFactory{mem: mem, dt: dt, len: length}).create() + defer data.Release() + return MakeFromData(data) +} diff --git a/go/arrow/array/util_test.go b/go/arrow/array/util_test.go index 36b03a94d8153..9a1539f33f142 100644 --- a/go/arrow/array/util_test.go +++ b/go/arrow/array/util_test.go @@ -24,11 +24,11 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/goccy/go-json" "github.com/stretchr/testify/assert" ) @@ -138,6 +138,27 @@ func TestStringsJSON(t *testing.T) { }) } + for _, tt := range tests { + t.Run("large json "+tt.jsonstring, func(t *testing.T) { + bldr := array.NewLargeStringBuilder(memory.DefaultAllocator) + defer bldr.Release() + + bldr.AppendValues(tt.values, tt.valids) + expected := bldr.NewLargeStringArray() + defer expected.Release() + + arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.LargeString, strings.NewReader(tt.jsonstring)) + assert.NoError(t, err) + defer arr.Release() + + assert.Truef(t, array.ArrayEqual(expected, arr), "expected: %s\ngot: %s\n", expected, arr) + + data, err := json.Marshal(arr) + assert.NoError(t, err) + assert.JSONEq(t, tt.jsonstring, string(data)) + }) + } + t.Run("errors", func(t *testing.T) { _, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader("[0]")) assert.Error(t, err) diff --git a/go/arrow/arrio/arrio.go b/go/arrow/arrio/arrio.go index 23687d3ef16ea..1115a772684d4 100644 --- a/go/arrow/arrio/arrio.go +++ b/go/arrow/arrio/arrio.go @@ -22,7 +22,7 @@ import ( "errors" "io" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" ) // Reader is the interface that wraps the Read method. diff --git a/go/arrow/arrio/arrio_test.go b/go/arrow/arrio/arrio_test.go index 087b59b1aef4e..57c75fcf0a4e5 100644 --- a/go/arrow/arrio/arrio_test.go +++ b/go/arrow/arrio/arrio_test.go @@ -23,11 +23,11 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/arrio" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/arrio" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) type copyKind int @@ -64,11 +64,7 @@ func (k copyKind) check(t *testing.T, f *os.File, mem memory.Allocator, schema * } func TestCopy(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-copy-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() for _, tc := range []struct { name string diff --git a/go/arrow/bitutil/Makefile b/go/arrow/bitutil/Makefile new file mode 100644 index 0000000000000..12dd1d3491745 --- /dev/null +++ b/go/arrow/bitutil/Makefile @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# this converts rotate instructions from "ro[lr] " -> "ro[lr] , 1" for yasm compatibility +PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' + +C2GOASM=c2goasm +CC=clang-11 +C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \ + -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib +ASM_FLAGS_AVX2=-mavx2 -mfma +ASM_FLAGS_SSE4=-msse4 +ASM_FLAGS_BMI2=-mbmi2 +ASM_FLAGS_POPCNT=-mpopcnt + +C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \ + -fno-rtti -fno-builtin -ffast-math -fno-jump-tables -I_lib + +GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') +ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') + +.PHONEY: assembly + +INTEL_SOURCES := \ + bitmap_ops_avx2_amd64.s bitmap_ops_sse4_amd64.s + +# +# ARROW-15336: DO NOT add the assembly target for Arm64 (ARM_SOURCES) until c2goasm added the Arm64 support. +# min_max_neon_arm64.s was generated by asm2plan9s. +# And manually formatted it as the Arm64 Plan9. +# + +assembly: $(INTEL_SOURCES) + +_lib/bitmap_ops_avx2_amd64.s: _lib/bitmap_ops.c + $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ + +_lib/bitmap_ops_sse4_amd64.s: _lib/bitmap_ops.c + $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ + +bitmap_ops_avx2_amd64.s: _lib/bitmap_ops_avx2_amd64.s + $(C2GOASM) -a -f $^ $@ + +bitmap_ops_sse4_amd64.s: _lib/bitmap_ops_sse4_amd64.s + $(C2GOASM) -a -f $^ $@ + +clean: + rm -f $(INTEL_SOURCES) + rm -f $(addprefix _lib/,$(INTEL_SOURCES)) diff --git a/go/arrow/bitutil/_lib/bitmap_ops.c b/go/arrow/bitutil/_lib/bitmap_ops.c new file mode 100644 index 0000000000000..96817b2f2b548 --- /dev/null +++ b/go/arrow/bitutil/_lib/bitmap_ops.c @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "../../../internal/utils/_lib/arch.h" +#include + +// like elsewhere in this repo, this .c file gets compiled into optimized +// assembly and then converted to go plan9 assembly via c2goasm so we can +// call these functions. see the Makefile in the parent directory. + +void FULL_NAME(bitmap_aligned_and)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { + for (int64_t i = 0; i < nbytes; ++i) { + out[i] = left[i] & right[i]; + } +} + +void FULL_NAME(bitmap_aligned_or)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { + for (int64_t i = 0; i < nbytes; ++i) { + out[i] = left[i] | right[i]; + } +} \ No newline at end of file diff --git a/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s b/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s new file mode 100644 index 0000000000000..69f69d297083a --- /dev/null +++ b/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s @@ -0,0 +1,212 @@ + .text + .intel_syntax noprefix + .file "bitmap_ops.c" + .globl bitmap_aligned_and_avx2 # -- Begin function bitmap_aligned_and_avx2 + .p2align 4, 0x90 + .type bitmap_aligned_and_avx2,@function +bitmap_aligned_and_avx2: # @bitmap_aligned_and_avx2 +# %bb.0: + push rbp + mov rbp, rsp + push rbx + and rsp, -8 + test rcx, rcx + jle .LBB0_12 +# %bb.1: + cmp rcx, 127 + ja .LBB0_7 +# %bb.2: + xor r10d, r10d + jmp .LBB0_3 +.LBB0_7: + lea r9, [rdx + rcx] + lea rax, [rdi + rcx] + cmp rax, rdx + seta r11b + lea rax, [rsi + rcx] + cmp r9, rdi + seta bl + cmp rax, rdx + seta r8b + cmp r9, rsi + seta r9b + xor r10d, r10d + test r11b, bl + jne .LBB0_3 +# %bb.8: + and r8b, r9b + jne .LBB0_3 +# %bb.9: + mov r10, rcx + and r10, -128 + xor r8d, r8d + .p2align 4, 0x90 +.LBB0_10: # =>This Inner Loop Header: Depth=1 + vmovups ymm0, ymmword ptr [rsi + r8] + vmovups ymm1, ymmword ptr [rsi + r8 + 32] + vmovups ymm2, ymmword ptr [rsi + r8 + 64] + vmovups ymm3, ymmword ptr [rsi + r8 + 96] + vandps ymm0, ymm0, ymmword ptr [rdi + r8] + vandps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] + vandps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] + vandps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] + vmovups ymmword ptr [rdx + r8], ymm0 + vmovups ymmword ptr [rdx + r8 + 32], ymm1 + vmovups ymmword ptr [rdx + r8 + 64], ymm2 + vmovups ymmword ptr [rdx + r8 + 96], ymm3 + sub r8, -128 + cmp r10, r8 + jne .LBB0_10 +# %bb.11: + cmp r10, rcx + je .LBB0_12 +.LBB0_3: + mov r8, r10 + not r8 + add r8, rcx + mov r9, rcx + and r9, 3 + je .LBB0_5 + .p2align 4, 0x90 +.LBB0_4: # =>This Inner Loop Header: Depth=1 + movzx eax, byte ptr [rsi + r10] + and al, byte ptr [rdi + r10] + mov byte ptr [rdx + r10], al + add r10, 1 + add r9, -1 + jne .LBB0_4 +.LBB0_5: + cmp r8, 3 + jb .LBB0_12 + .p2align 4, 0x90 +.LBB0_6: # =>This Inner Loop Header: Depth=1 + movzx eax, byte ptr [rsi + r10] + and al, byte ptr [rdi + r10] + mov byte ptr [rdx + r10], al + movzx eax, byte ptr [rsi + r10 + 1] + and al, byte ptr [rdi + r10 + 1] + mov byte ptr [rdx + r10 + 1], al + movzx eax, byte ptr [rsi + r10 + 2] + and al, byte ptr [rdi + r10 + 2] + mov byte ptr [rdx + r10 + 2], al + movzx eax, byte ptr [rsi + r10 + 3] + and al, byte ptr [rdi + r10 + 3] + mov byte ptr [rdx + r10 + 3], al + add r10, 4 + cmp rcx, r10 + jne .LBB0_6 +.LBB0_12: + lea rsp, [rbp - 8] + pop rbx + pop rbp + vzeroupper + ret +.Lfunc_end0: + .size bitmap_aligned_and_avx2, .Lfunc_end0-bitmap_aligned_and_avx2 + # -- End function + .globl bitmap_aligned_or_avx2 # -- Begin function bitmap_aligned_or_avx2 + .p2align 4, 0x90 + .type bitmap_aligned_or_avx2,@function +bitmap_aligned_or_avx2: # @bitmap_aligned_or_avx2 +# %bb.0: + push rbp + mov rbp, rsp + push rbx + and rsp, -8 + test rcx, rcx + jle .LBB1_12 +# %bb.1: + cmp rcx, 127 + ja .LBB1_7 +# %bb.2: + xor r10d, r10d + jmp .LBB1_3 +.LBB1_7: + lea r9, [rdx + rcx] + lea rax, [rdi + rcx] + cmp rax, rdx + seta r11b + lea rax, [rsi + rcx] + cmp r9, rdi + seta bl + cmp rax, rdx + seta r8b + cmp r9, rsi + seta r9b + xor r10d, r10d + test r11b, bl + jne .LBB1_3 +# %bb.8: + and r8b, r9b + jne .LBB1_3 +# %bb.9: + mov r10, rcx + and r10, -128 + xor r8d, r8d + .p2align 4, 0x90 +.LBB1_10: # =>This Inner Loop Header: Depth=1 + vmovups ymm0, ymmword ptr [rsi + r8] + vmovups ymm1, ymmword ptr [rsi + r8 + 32] + vmovups ymm2, ymmword ptr [rsi + r8 + 64] + vmovups ymm3, ymmword ptr [rsi + r8 + 96] + vorps ymm0, ymm0, ymmword ptr [rdi + r8] + vorps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] + vorps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] + vorps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] + vmovups ymmword ptr [rdx + r8], ymm0 + vmovups ymmword ptr [rdx + r8 + 32], ymm1 + vmovups ymmword ptr [rdx + r8 + 64], ymm2 + vmovups ymmword ptr [rdx + r8 + 96], ymm3 + sub r8, -128 + cmp r10, r8 + jne .LBB1_10 +# %bb.11: + cmp r10, rcx + je .LBB1_12 +.LBB1_3: + mov r8, r10 + not r8 + add r8, rcx + mov r9, rcx + and r9, 3 + je .LBB1_5 + .p2align 4, 0x90 +.LBB1_4: # =>This Inner Loop Header: Depth=1 + movzx eax, byte ptr [rsi + r10] + or al, byte ptr [rdi + r10] + mov byte ptr [rdx + r10], al + add r10, 1 + add r9, -1 + jne .LBB1_4 +.LBB1_5: + cmp r8, 3 + jb .LBB1_12 + .p2align 4, 0x90 +.LBB1_6: # =>This Inner Loop Header: Depth=1 + movzx eax, byte ptr [rsi + r10] + or al, byte ptr [rdi + r10] + mov byte ptr [rdx + r10], al + movzx eax, byte ptr [rsi + r10 + 1] + or al, byte ptr [rdi + r10 + 1] + mov byte ptr [rdx + r10 + 1], al + movzx eax, byte ptr [rsi + r10 + 2] + or al, byte ptr [rdi + r10 + 2] + mov byte ptr [rdx + r10 + 2], al + movzx eax, byte ptr [rsi + r10 + 3] + or al, byte ptr [rdi + r10 + 3] + mov byte ptr [rdx + r10 + 3], al + add r10, 4 + cmp rcx, r10 + jne .LBB1_6 +.LBB1_12: + lea rsp, [rbp - 8] + pop rbx + pop rbp + vzeroupper + ret +.Lfunc_end1: + .size bitmap_aligned_or_avx2, .Lfunc_end1-bitmap_aligned_or_avx2 + # -- End function + .ident "Ubuntu clang version 11.1.0-6" + .section ".note.GNU-stack","",@progbits + .addrsig diff --git a/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s b/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s new file mode 100644 index 0000000000000..9d028155b7239 --- /dev/null +++ b/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s @@ -0,0 +1,272 @@ + .text + .intel_syntax noprefix + .file "bitmap_ops.c" + .globl bitmap_aligned_and_sse4 # -- Begin function bitmap_aligned_and_sse4 + .p2align 4, 0x90 + .type bitmap_aligned_and_sse4,@function +bitmap_aligned_and_sse4: # @bitmap_aligned_and_sse4 +# %bb.0: + push rbp + mov rbp, rsp + push rbx + and rsp, -8 + test rcx, rcx + jle .LBB0_16 +# %bb.1: + cmp rcx, 31 + ja .LBB0_7 +# %bb.2: + xor r11d, r11d +.LBB0_3: + mov r8, r11 + not r8 + add r8, rcx + mov r9, rcx + and r9, 3 + je .LBB0_5 + .p2align 4, 0x90 +.LBB0_4: # =>This Inner Loop Header: Depth=1 + movzx eax, byte ptr [rsi + r11] + and al, byte ptr [rdi + r11] + mov byte ptr [rdx + r11], al + add r11, 1 + add r9, -1 + jne .LBB0_4 +.LBB0_5: + cmp r8, 3 + jb .LBB0_16 + .p2align 4, 0x90 +.LBB0_6: # =>This Inner Loop Header: Depth=1 + movzx eax, byte ptr [rsi + r11] + and al, byte ptr [rdi + r11] + mov byte ptr [rdx + r11], al + movzx eax, byte ptr [rsi + r11 + 1] + and al, byte ptr [rdi + r11 + 1] + mov byte ptr [rdx + r11 + 1], al + movzx eax, byte ptr [rsi + r11 + 2] + and al, byte ptr [rdi + r11 + 2] + mov byte ptr [rdx + r11 + 2], al + movzx eax, byte ptr [rsi + r11 + 3] + and al, byte ptr [rdi + r11 + 3] + mov byte ptr [rdx + r11 + 3], al + add r11, 4 + cmp rcx, r11 + jne .LBB0_6 + jmp .LBB0_16 +.LBB0_7: + lea r9, [rdx + rcx] + lea rax, [rdi + rcx] + cmp rax, rdx + seta r10b + lea rax, [rsi + rcx] + cmp r9, rdi + seta bl + cmp rax, rdx + seta r8b + cmp r9, rsi + seta r9b + xor r11d, r11d + test r10b, bl + jne .LBB0_3 +# %bb.8: + and r8b, r9b + jne .LBB0_3 +# %bb.9: + mov r11, rcx + and r11, -32 + lea rax, [r11 - 32] + mov r9, rax + shr r9, 5 + add r9, 1 + test rax, rax + je .LBB0_10 +# %bb.11: + mov r10, r9 + and r10, -2 + neg r10 + xor r8d, r8d + .p2align 4, 0x90 +.LBB0_12: # =>This Inner Loop Header: Depth=1 + movups xmm0, xmmword ptr [rdi + r8] + movups xmm1, xmmword ptr [rdi + r8 + 16] + movups xmm2, xmmword ptr [rsi + r8] + andps xmm2, xmm0 + movups xmm0, xmmword ptr [rsi + r8 + 16] + andps xmm0, xmm1 + movups xmmword ptr [rdx + r8], xmm2 + movups xmmword ptr [rdx + r8 + 16], xmm0 + movups xmm0, xmmword ptr [rdi + r8 + 32] + movups xmm1, xmmword ptr [rdi + r8 + 48] + movups xmm2, xmmword ptr [rsi + r8 + 32] + andps xmm2, xmm0 + movups xmm0, xmmword ptr [rsi + r8 + 48] + andps xmm0, xmm1 + movups xmmword ptr [rdx + r8 + 32], xmm2 + movups xmmword ptr [rdx + r8 + 48], xmm0 + add r8, 64 + add r10, 2 + jne .LBB0_12 +# %bb.13: + test r9b, 1 + je .LBB0_15 +.LBB0_14: + movups xmm0, xmmword ptr [rdi + r8] + movups xmm1, xmmword ptr [rdi + r8 + 16] + movups xmm2, xmmword ptr [rsi + r8] + andps xmm2, xmm0 + movups xmm0, xmmword ptr [rsi + r8 + 16] + andps xmm0, xmm1 + movups xmmword ptr [rdx + r8], xmm2 + movups xmmword ptr [rdx + r8 + 16], xmm0 +.LBB0_15: + cmp r11, rcx + jne .LBB0_3 +.LBB0_16: + lea rsp, [rbp - 8] + pop rbx + pop rbp + ret +.LBB0_10: + xor r8d, r8d + test r9b, 1 + jne .LBB0_14 + jmp .LBB0_15 +.Lfunc_end0: + .size bitmap_aligned_and_sse4, .Lfunc_end0-bitmap_aligned_and_sse4 + # -- End function + .globl bitmap_aligned_or_sse4 # -- Begin function bitmap_aligned_or_sse4 + .p2align 4, 0x90 + .type bitmap_aligned_or_sse4,@function +bitmap_aligned_or_sse4: # @bitmap_aligned_or_sse4 +# %bb.0: + push rbp + mov rbp, rsp + push rbx + and rsp, -8 + test rcx, rcx + jle .LBB1_16 +# %bb.1: + cmp rcx, 31 + ja .LBB1_7 +# %bb.2: + xor r11d, r11d +.LBB1_3: + mov r8, r11 + not r8 + add r8, rcx + mov r9, rcx + and r9, 3 + je .LBB1_5 + .p2align 4, 0x90 +.LBB1_4: # =>This Inner Loop Header: Depth=1 + movzx eax, byte ptr [rsi + r11] + or al, byte ptr [rdi + r11] + mov byte ptr [rdx + r11], al + add r11, 1 + add r9, -1 + jne .LBB1_4 +.LBB1_5: + cmp r8, 3 + jb .LBB1_16 + .p2align 4, 0x90 +.LBB1_6: # =>This Inner Loop Header: Depth=1 + movzx eax, byte ptr [rsi + r11] + or al, byte ptr [rdi + r11] + mov byte ptr [rdx + r11], al + movzx eax, byte ptr [rsi + r11 + 1] + or al, byte ptr [rdi + r11 + 1] + mov byte ptr [rdx + r11 + 1], al + movzx eax, byte ptr [rsi + r11 + 2] + or al, byte ptr [rdi + r11 + 2] + mov byte ptr [rdx + r11 + 2], al + movzx eax, byte ptr [rsi + r11 + 3] + or al, byte ptr [rdi + r11 + 3] + mov byte ptr [rdx + r11 + 3], al + add r11, 4 + cmp rcx, r11 + jne .LBB1_6 + jmp .LBB1_16 +.LBB1_7: + lea r9, [rdx + rcx] + lea rax, [rdi + rcx] + cmp rax, rdx + seta r10b + lea rax, [rsi + rcx] + cmp r9, rdi + seta bl + cmp rax, rdx + seta r8b + cmp r9, rsi + seta r9b + xor r11d, r11d + test r10b, bl + jne .LBB1_3 +# %bb.8: + and r8b, r9b + jne .LBB1_3 +# %bb.9: + mov r11, rcx + and r11, -32 + lea rax, [r11 - 32] + mov r9, rax + shr r9, 5 + add r9, 1 + test rax, rax + je .LBB1_10 +# %bb.11: + mov r10, r9 + and r10, -2 + neg r10 + xor r8d, r8d + .p2align 4, 0x90 +.LBB1_12: # =>This Inner Loop Header: Depth=1 + movups xmm0, xmmword ptr [rdi + r8] + movups xmm1, xmmword ptr [rdi + r8 + 16] + movups xmm2, xmmword ptr [rsi + r8] + orps xmm2, xmm0 + movups xmm0, xmmword ptr [rsi + r8 + 16] + orps xmm0, xmm1 + movups xmmword ptr [rdx + r8], xmm2 + movups xmmword ptr [rdx + r8 + 16], xmm0 + movups xmm0, xmmword ptr [rdi + r8 + 32] + movups xmm1, xmmword ptr [rdi + r8 + 48] + movups xmm2, xmmword ptr [rsi + r8 + 32] + orps xmm2, xmm0 + movups xmm0, xmmword ptr [rsi + r8 + 48] + orps xmm0, xmm1 + movups xmmword ptr [rdx + r8 + 32], xmm2 + movups xmmword ptr [rdx + r8 + 48], xmm0 + add r8, 64 + add r10, 2 + jne .LBB1_12 +# %bb.13: + test r9b, 1 + je .LBB1_15 +.LBB1_14: + movups xmm0, xmmword ptr [rdi + r8] + movups xmm1, xmmword ptr [rdi + r8 + 16] + movups xmm2, xmmword ptr [rsi + r8] + orps xmm2, xmm0 + movups xmm0, xmmword ptr [rsi + r8 + 16] + orps xmm0, xmm1 + movups xmmword ptr [rdx + r8], xmm2 + movups xmmword ptr [rdx + r8 + 16], xmm0 +.LBB1_15: + cmp r11, rcx + jne .LBB1_3 +.LBB1_16: + lea rsp, [rbp - 8] + pop rbx + pop rbp + ret +.LBB1_10: + xor r8d, r8d + test r9b, 1 + jne .LBB1_14 + jmp .LBB1_15 +.Lfunc_end1: + .size bitmap_aligned_or_sse4, .Lfunc_end1-bitmap_aligned_or_sse4 + # -- End function + .ident "Ubuntu clang version 11.1.0-6" + .section ".note.GNU-stack","",@progbits + .addrsig diff --git a/go/arrow/bitutil/bitmap_ops.go b/go/arrow/bitutil/bitmap_ops.go new file mode 100644 index 0000000000000..62322b04b9d19 --- /dev/null +++ b/go/arrow/bitutil/bitmap_ops.go @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bitutil + +func alignedBitAndGo(left, right, out []byte) { + var ( + nbytes = len(out) + i = 0 + ) + if nbytes > uint64SizeBytes { + // case where we have enough bytes to operate on words + leftWords := bytesToUint64(left[i:]) + rightWords := bytesToUint64(right[i:]) + outWords := bytesToUint64(out[i:]) + + for w := range outWords { + outWords[w] = leftWords[w] & rightWords[w] + } + + i += len(outWords) * uint64SizeBytes + } + // grab any remaining bytes that were fewer than a word + for ; i < nbytes; i++ { + out[i] = left[i] & right[i] + } +} + +func alignedBitOrGo(left, right, out []byte) { + var ( + nbytes = len(out) + i = 0 + ) + if nbytes > uint64SizeBytes { + // case where we have enough bytes to operate on words + leftWords := bytesToUint64(left[i:]) + rightWords := bytesToUint64(right[i:]) + outWords := bytesToUint64(out[i:]) + + for w := range outWords { + outWords[w] = leftWords[w] | rightWords[w] + } + + i += len(outWords) * uint64SizeBytes + } + // grab any remaining bytes that were fewer than a word + for ; i < nbytes; i++ { + out[i] = left[i] | right[i] + } +} diff --git a/go/arrow/bitutil/bitmap_ops_amd64.go b/go/arrow/bitutil/bitmap_ops_amd64.go new file mode 100644 index 0000000000000..9aa5a6dd56bec --- /dev/null +++ b/go/arrow/bitutil/bitmap_ops_amd64.go @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package bitutil + +import "golang.org/x/sys/cpu" + +func init() { + if cpu.X86.HasAVX2 { + bitAndOp.opAligned = bitmapAlignedAndAVX2 + bitOrOp.opAligned = bitmapAlignedOrAVX2 + } else if cpu.X86.HasSSE42 { + bitAndOp.opAligned = bitmapAlignedAndSSE4 + bitOrOp.opAligned = bitmapAlignedOrSSE4 + } else { + bitAndOp.opAligned = alignedBitAndGo + bitOrOp.opAligned = alignedBitOrGo + } +} diff --git a/go/arrow/bitutil/bitmap_ops_arm64.go b/go/arrow/bitutil/bitmap_ops_arm64.go new file mode 100644 index 0000000000000..86c47639a9e80 --- /dev/null +++ b/go/arrow/bitutil/bitmap_ops_arm64.go @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package bitutil + +func init() { + bitAndOp.opAligned = alignedBitAndGo + bitOrOp.opAligned = alignedBitOrGo +} diff --git a/go/arrow/bitutil/bitmap_ops_avx2_amd64.go b/go/arrow/bitutil/bitmap_ops_avx2_amd64.go new file mode 100644 index 0000000000000..731b9807b79a1 --- /dev/null +++ b/go/arrow/bitutil/bitmap_ops_avx2_amd64.go @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package bitutil + +import ( + "unsafe" +) + +//go:noescape +func _bitmap_aligned_and_avx2(left, right, out unsafe.Pointer, length int64) + +func bitmapAlignedAndAVX2(left, right, out []byte) { + _bitmap_aligned_and_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) +} + +//go:noescape +func _bitmap_aligned_or_avx2(left, right, out unsafe.Pointer, length int64) + +func bitmapAlignedOrAVX2(left, right, out []byte) { + _bitmap_aligned_or_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) +} diff --git a/go/arrow/bitutil/bitmap_ops_avx2_amd64.s b/go/arrow/bitutil/bitmap_ops_avx2_amd64.s new file mode 100644 index 0000000000000..2e2ade89617ce --- /dev/null +++ b/go/arrow/bitutil/bitmap_ops_avx2_amd64.s @@ -0,0 +1,192 @@ +//+build !noasm !appengine +// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT + +TEXT ·_bitmap_aligned_and_avx2(SB), $0-32 + + MOVQ left+0(FP), DI + MOVQ right+8(FP), SI + MOVQ out+16(FP), DX + MOVQ length+24(FP), CX + + WORD $0x8548; BYTE $0xc9 // test rcx, rcx + JLE LBB0_12 + LONG $0x7ff98348 // cmp rcx, 127 + JA LBB0_7 + WORD $0x3145; BYTE $0xd2 // xor r10d, r10d + JMP LBB0_3 + +LBB0_7: + LONG $0x0a0c8d4c // lea r9, [rdx + rcx] + LONG $0x0f048d48 // lea rax, [rdi + rcx] + WORD $0x3948; BYTE $0xd0 // cmp rax, rdx + LONG $0xd3970f41 // seta r11b + LONG $0x0e048d48 // lea rax, [rsi + rcx] + WORD $0x3949; BYTE $0xf9 // cmp r9, rdi + WORD $0x970f; BYTE $0xd3 // seta bl + WORD $0x3948; BYTE $0xd0 // cmp rax, rdx + LONG $0xd0970f41 // seta r8b + WORD $0x3949; BYTE $0xf1 // cmp r9, rsi + LONG $0xd1970f41 // seta r9b + WORD $0x3145; BYTE $0xd2 // xor r10d, r10d + WORD $0x8441; BYTE $0xdb // test r11b, bl + JNE LBB0_3 + WORD $0x2045; BYTE $0xc8 // and r8b, r9b + JNE LBB0_3 + WORD $0x8949; BYTE $0xca // mov r10, rcx + LONG $0x80e28349 // and r10, -128 + WORD $0x3145; BYTE $0xc0 // xor r8d, r8d + +LBB0_10: + LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] + LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] + LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] + LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] + LONG $0x547ca1c4; WORD $0x0704 // vandps ymm0, ymm0, yword [rdi + r8] + LONG $0x5474a1c4; WORD $0x074c; BYTE $0x20 // vandps ymm1, ymm1, yword [rdi + r8 + 32] + LONG $0x546ca1c4; WORD $0x0754; BYTE $0x40 // vandps ymm2, ymm2, yword [rdi + r8 + 64] + LONG $0x5464a1c4; WORD $0x075c; BYTE $0x60 // vandps ymm3, ymm3, yword [rdi + r8 + 96] + LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 + LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 + LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 + LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 + LONG $0x80e88349 // sub r8, -128 + WORD $0x394d; BYTE $0xc2 // cmp r10, r8 + JNE LBB0_10 + WORD $0x3949; BYTE $0xca // cmp r10, rcx + JE LBB0_12 + +LBB0_3: + WORD $0x894d; BYTE $0xd0 // mov r8, r10 + WORD $0xf749; BYTE $0xd0 // not r8 + WORD $0x0149; BYTE $0xc8 // add r8, rcx + WORD $0x8949; BYTE $0xc9 // mov r9, rcx + LONG $0x03e18349 // and r9, 3 + JE LBB0_5 + +LBB0_4: + LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] + LONG $0x17042242 // and al, byte [rdi + r10] + LONG $0x12048842 // mov byte [rdx + r10], al + LONG $0x01c28349 // add r10, 1 + LONG $0xffc18349 // add r9, -1 + JNE LBB0_4 + +LBB0_5: + LONG $0x03f88349 // cmp r8, 3 + JB LBB0_12 + +LBB0_6: + LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] + LONG $0x17042242 // and al, byte [rdi + r10] + LONG $0x12048842 // mov byte [rdx + r10], al + LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] + LONG $0x17442242; BYTE $0x01 // and al, byte [rdi + r10 + 1] + LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al + LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] + LONG $0x17442242; BYTE $0x02 // and al, byte [rdi + r10 + 2] + LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al + LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] + LONG $0x17442242; BYTE $0x03 // and al, byte [rdi + r10 + 3] + LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al + LONG $0x04c28349 // add r10, 4 + WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 + JNE LBB0_6 + +LBB0_12: + VZEROUPPER + RET + +TEXT ·_bitmap_aligned_or_avx2(SB), $0-32 + + MOVQ left+0(FP), DI + MOVQ right+8(FP), SI + MOVQ out+16(FP), DX + MOVQ length+24(FP), CX + + WORD $0x8548; BYTE $0xc9 // test rcx, rcx + JLE LBB1_12 + LONG $0x7ff98348 // cmp rcx, 127 + JA LBB1_7 + WORD $0x3145; BYTE $0xd2 // xor r10d, r10d + JMP LBB1_3 + +LBB1_7: + LONG $0x0a0c8d4c // lea r9, [rdx + rcx] + LONG $0x0f048d48 // lea rax, [rdi + rcx] + WORD $0x3948; BYTE $0xd0 // cmp rax, rdx + LONG $0xd3970f41 // seta r11b + LONG $0x0e048d48 // lea rax, [rsi + rcx] + WORD $0x3949; BYTE $0xf9 // cmp r9, rdi + WORD $0x970f; BYTE $0xd3 // seta bl + WORD $0x3948; BYTE $0xd0 // cmp rax, rdx + LONG $0xd0970f41 // seta r8b + WORD $0x3949; BYTE $0xf1 // cmp r9, rsi + LONG $0xd1970f41 // seta r9b + WORD $0x3145; BYTE $0xd2 // xor r10d, r10d + WORD $0x8441; BYTE $0xdb // test r11b, bl + JNE LBB1_3 + WORD $0x2045; BYTE $0xc8 // and r8b, r9b + JNE LBB1_3 + WORD $0x8949; BYTE $0xca // mov r10, rcx + LONG $0x80e28349 // and r10, -128 + WORD $0x3145; BYTE $0xc0 // xor r8d, r8d + +LBB1_10: + LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] + LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] + LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] + LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] + LONG $0x567ca1c4; WORD $0x0704 // vorps ymm0, ymm0, yword [rdi + r8] + LONG $0x5674a1c4; WORD $0x074c; BYTE $0x20 // vorps ymm1, ymm1, yword [rdi + r8 + 32] + LONG $0x566ca1c4; WORD $0x0754; BYTE $0x40 // vorps ymm2, ymm2, yword [rdi + r8 + 64] + LONG $0x5664a1c4; WORD $0x075c; BYTE $0x60 // vorps ymm3, ymm3, yword [rdi + r8 + 96] + LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 + LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 + LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 + LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 + LONG $0x80e88349 // sub r8, -128 + WORD $0x394d; BYTE $0xc2 // cmp r10, r8 + JNE LBB1_10 + WORD $0x3949; BYTE $0xca // cmp r10, rcx + JE LBB1_12 + +LBB1_3: + WORD $0x894d; BYTE $0xd0 // mov r8, r10 + WORD $0xf749; BYTE $0xd0 // not r8 + WORD $0x0149; BYTE $0xc8 // add r8, rcx + WORD $0x8949; BYTE $0xc9 // mov r9, rcx + LONG $0x03e18349 // and r9, 3 + JE LBB1_5 + +LBB1_4: + LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] + LONG $0x17040a42 // or al, byte [rdi + r10] + LONG $0x12048842 // mov byte [rdx + r10], al + LONG $0x01c28349 // add r10, 1 + LONG $0xffc18349 // add r9, -1 + JNE LBB1_4 + +LBB1_5: + LONG $0x03f88349 // cmp r8, 3 + JB LBB1_12 + +LBB1_6: + LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] + LONG $0x17040a42 // or al, byte [rdi + r10] + LONG $0x12048842 // mov byte [rdx + r10], al + LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] + LONG $0x17440a42; BYTE $0x01 // or al, byte [rdi + r10 + 1] + LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al + LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] + LONG $0x17440a42; BYTE $0x02 // or al, byte [rdi + r10 + 2] + LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al + LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] + LONG $0x17440a42; BYTE $0x03 // or al, byte [rdi + r10 + 3] + LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al + LONG $0x04c28349 // add r10, 4 + WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 + JNE LBB1_6 + +LBB1_12: + VZEROUPPER + RET diff --git a/go/arrow/bitutil/bitmap_ops_noasm.go b/go/arrow/bitutil/bitmap_ops_noasm.go new file mode 100644 index 0000000000000..785531c1c23f3 --- /dev/null +++ b/go/arrow/bitutil/bitmap_ops_noasm.go @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build noasm +// +build noasm + +package bitutil + +func init() { + bitAndOp.opAligned = alignedBitAndGo + bitOrOp.opAligned = alignedBitOrGo +} diff --git a/go/arrow/bitutil/bitmap_ops_ppc64le.go b/go/arrow/bitutil/bitmap_ops_ppc64le.go new file mode 100644 index 0000000000000..86c47639a9e80 --- /dev/null +++ b/go/arrow/bitutil/bitmap_ops_ppc64le.go @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package bitutil + +func init() { + bitAndOp.opAligned = alignedBitAndGo + bitOrOp.opAligned = alignedBitOrGo +} diff --git a/go/arrow/bitutil/bitmap_ops_s390x.go b/go/arrow/bitutil/bitmap_ops_s390x.go new file mode 100644 index 0000000000000..86c47639a9e80 --- /dev/null +++ b/go/arrow/bitutil/bitmap_ops_s390x.go @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package bitutil + +func init() { + bitAndOp.opAligned = alignedBitAndGo + bitOrOp.opAligned = alignedBitOrGo +} diff --git a/go/arrow/bitutil/bitmap_ops_sse4_amd64.go b/go/arrow/bitutil/bitmap_ops_sse4_amd64.go new file mode 100644 index 0000000000000..5d1fcf96829b3 --- /dev/null +++ b/go/arrow/bitutil/bitmap_ops_sse4_amd64.go @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package bitutil + +import ( + "unsafe" +) + +//go:noescape +func _bitmap_aligned_and_sse4(left, right, out unsafe.Pointer, length int64) + +func bitmapAlignedAndSSE4(left, right, out []byte) { + _bitmap_aligned_and_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) +} + +//go:noescape +func _bitmap_aligned_or_sse4(left, right, out unsafe.Pointer, length int64) + +func bitmapAlignedOrSSE4(left, right, out []byte) { + _bitmap_aligned_or_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) +} diff --git a/go/arrow/bitutil/bitmap_ops_sse4_amd64.s b/go/arrow/bitutil/bitmap_ops_sse4_amd64.s new file mode 100644 index 0000000000000..ad81cf63720bd --- /dev/null +++ b/go/arrow/bitutil/bitmap_ops_sse4_amd64.s @@ -0,0 +1,256 @@ +//+build !noasm !appengine +// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT + +TEXT ·_bitmap_aligned_and_sse4(SB), $0-32 + + MOVQ left+0(FP), DI + MOVQ right+8(FP), SI + MOVQ out+16(FP), DX + MOVQ length+24(FP), CX + + WORD $0x8548; BYTE $0xc9 // test rcx, rcx + JLE LBB0_16 + LONG $0x1ff98348 // cmp rcx, 31 + JA LBB0_7 + WORD $0x3145; BYTE $0xdb // xor r11d, r11d + +LBB0_3: + WORD $0x894d; BYTE $0xd8 // mov r8, r11 + WORD $0xf749; BYTE $0xd0 // not r8 + WORD $0x0149; BYTE $0xc8 // add r8, rcx + WORD $0x8949; BYTE $0xc9 // mov r9, rcx + LONG $0x03e18349 // and r9, 3 + JE LBB0_5 + +LBB0_4: + LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] + LONG $0x1f042242 // and al, byte [rdi + r11] + LONG $0x1a048842 // mov byte [rdx + r11], al + LONG $0x01c38349 // add r11, 1 + LONG $0xffc18349 // add r9, -1 + JNE LBB0_4 + +LBB0_5: + LONG $0x03f88349 // cmp r8, 3 + JB LBB0_16 + +LBB0_6: + LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] + LONG $0x1f042242 // and al, byte [rdi + r11] + LONG $0x1a048842 // mov byte [rdx + r11], al + LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] + LONG $0x1f442242; BYTE $0x01 // and al, byte [rdi + r11 + 1] + LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al + LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] + LONG $0x1f442242; BYTE $0x02 // and al, byte [rdi + r11 + 2] + LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al + LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] + LONG $0x1f442242; BYTE $0x03 // and al, byte [rdi + r11 + 3] + LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al + LONG $0x04c38349 // add r11, 4 + WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 + JNE LBB0_6 + JMP LBB0_16 + +LBB0_7: + LONG $0x0a0c8d4c // lea r9, [rdx + rcx] + LONG $0x0f048d48 // lea rax, [rdi + rcx] + WORD $0x3948; BYTE $0xd0 // cmp rax, rdx + LONG $0xd2970f41 // seta r10b + LONG $0x0e048d48 // lea rax, [rsi + rcx] + WORD $0x3949; BYTE $0xf9 // cmp r9, rdi + WORD $0x970f; BYTE $0xd3 // seta bl + WORD $0x3948; BYTE $0xd0 // cmp rax, rdx + LONG $0xd0970f41 // seta r8b + WORD $0x3949; BYTE $0xf1 // cmp r9, rsi + LONG $0xd1970f41 // seta r9b + WORD $0x3145; BYTE $0xdb // xor r11d, r11d + WORD $0x8441; BYTE $0xda // test r10b, bl + JNE LBB0_3 + WORD $0x2045; BYTE $0xc8 // and r8b, r9b + JNE LBB0_3 + WORD $0x8949; BYTE $0xcb // mov r11, rcx + LONG $0xe0e38349 // and r11, -32 + LONG $0xe0438d49 // lea rax, [r11 - 32] + WORD $0x8949; BYTE $0xc1 // mov r9, rax + LONG $0x05e9c149 // shr r9, 5 + LONG $0x01c18349 // add r9, 1 + WORD $0x8548; BYTE $0xc0 // test rax, rax + JE LBB0_10 + WORD $0x894d; BYTE $0xca // mov r10, r9 + LONG $0xfee28349 // and r10, -2 + WORD $0xf749; BYTE $0xda // neg r10 + WORD $0x3145; BYTE $0xc0 // xor r8d, r8d + +LBB0_12: + LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] + LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] + LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] + WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 + LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] + WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 + LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 + LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 + LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] + LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] + LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] + WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 + LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] + WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 + LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 + LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 + LONG $0x40c08349 // add r8, 64 + LONG $0x02c28349 // add r10, 2 + JNE LBB0_12 + LONG $0x01c1f641 // test r9b, 1 + JE LBB0_15 + +LBB0_14: + LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] + LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] + LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] + WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 + LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] + WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 + LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 + LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 + +LBB0_15: + WORD $0x3949; BYTE $0xcb // cmp r11, rcx + JNE LBB0_3 + +LBB0_16: + RET + +LBB0_10: + WORD $0x3145; BYTE $0xc0 // xor r8d, r8d + LONG $0x01c1f641 // test r9b, 1 + JNE LBB0_14 + JMP LBB0_15 + +TEXT ·_bitmap_aligned_or_sse4(SB), $0-32 + + MOVQ left+0(FP), DI + MOVQ right+8(FP), SI + MOVQ out+16(FP), DX + MOVQ length+24(FP), CX + + WORD $0x8548; BYTE $0xc9 // test rcx, rcx + JLE LBB1_16 + LONG $0x1ff98348 // cmp rcx, 31 + JA LBB1_7 + WORD $0x3145; BYTE $0xdb // xor r11d, r11d + +LBB1_3: + WORD $0x894d; BYTE $0xd8 // mov r8, r11 + WORD $0xf749; BYTE $0xd0 // not r8 + WORD $0x0149; BYTE $0xc8 // add r8, rcx + WORD $0x8949; BYTE $0xc9 // mov r9, rcx + LONG $0x03e18349 // and r9, 3 + JE LBB1_5 + +LBB1_4: + LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] + LONG $0x1f040a42 // or al, byte [rdi + r11] + LONG $0x1a048842 // mov byte [rdx + r11], al + LONG $0x01c38349 // add r11, 1 + LONG $0xffc18349 // add r9, -1 + JNE LBB1_4 + +LBB1_5: + LONG $0x03f88349 // cmp r8, 3 + JB LBB1_16 + +LBB1_6: + LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] + LONG $0x1f040a42 // or al, byte [rdi + r11] + LONG $0x1a048842 // mov byte [rdx + r11], al + LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] + LONG $0x1f440a42; BYTE $0x01 // or al, byte [rdi + r11 + 1] + LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al + LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] + LONG $0x1f440a42; BYTE $0x02 // or al, byte [rdi + r11 + 2] + LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al + LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] + LONG $0x1f440a42; BYTE $0x03 // or al, byte [rdi + r11 + 3] + LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al + LONG $0x04c38349 // add r11, 4 + WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 + JNE LBB1_6 + JMP LBB1_16 + +LBB1_7: + LONG $0x0a0c8d4c // lea r9, [rdx + rcx] + LONG $0x0f048d48 // lea rax, [rdi + rcx] + WORD $0x3948; BYTE $0xd0 // cmp rax, rdx + LONG $0xd2970f41 // seta r10b + LONG $0x0e048d48 // lea rax, [rsi + rcx] + WORD $0x3949; BYTE $0xf9 // cmp r9, rdi + WORD $0x970f; BYTE $0xd3 // seta bl + WORD $0x3948; BYTE $0xd0 // cmp rax, rdx + LONG $0xd0970f41 // seta r8b + WORD $0x3949; BYTE $0xf1 // cmp r9, rsi + LONG $0xd1970f41 // seta r9b + WORD $0x3145; BYTE $0xdb // xor r11d, r11d + WORD $0x8441; BYTE $0xda // test r10b, bl + JNE LBB1_3 + WORD $0x2045; BYTE $0xc8 // and r8b, r9b + JNE LBB1_3 + WORD $0x8949; BYTE $0xcb // mov r11, rcx + LONG $0xe0e38349 // and r11, -32 + LONG $0xe0438d49 // lea rax, [r11 - 32] + WORD $0x8949; BYTE $0xc1 // mov r9, rax + LONG $0x05e9c149 // shr r9, 5 + LONG $0x01c18349 // add r9, 1 + WORD $0x8548; BYTE $0xc0 // test rax, rax + JE LBB1_10 + WORD $0x894d; BYTE $0xca // mov r10, r9 + LONG $0xfee28349 // and r10, -2 + WORD $0xf749; BYTE $0xda // neg r10 + WORD $0x3145; BYTE $0xc0 // xor r8d, r8d + +LBB1_12: + LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] + LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] + LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] + WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 + LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] + WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 + LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 + LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 + LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] + LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] + LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] + WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 + LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] + WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 + LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 + LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 + LONG $0x40c08349 // add r8, 64 + LONG $0x02c28349 // add r10, 2 + JNE LBB1_12 + LONG $0x01c1f641 // test r9b, 1 + JE LBB1_15 + +LBB1_14: + LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] + LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] + LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] + WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 + LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] + WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 + LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 + LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 + +LBB1_15: + WORD $0x3949; BYTE $0xcb // cmp r11, rcx + JNE LBB1_3 + +LBB1_16: + RET + +LBB1_10: + WORD $0x3145; BYTE $0xc0 // xor r8d, r8d + LONG $0x01c1f641 // test r9b, 1 + JNE LBB1_14 + JMP LBB1_15 diff --git a/go/arrow/bitutil/bitmaps.go b/go/arrow/bitutil/bitmaps.go index 33bebf389cc51..10fa02797797a 100644 --- a/go/arrow/bitutil/bitmaps.go +++ b/go/arrow/bitutil/bitmaps.go @@ -17,11 +17,13 @@ package bitutil import ( + "bytes" "math/bits" "unsafe" - "github.com/apache/arrow/go/v9/arrow/endian" - "github.com/apache/arrow/go/v9/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" ) // BitmapReader is a simple bitmap reader for a byte slice. @@ -202,7 +204,7 @@ func NewBitmapWordReader(bitmap []byte, offset, length int) *BitmapWordReader { if bm.nwords > 0 { bm.curword = toFromLEFunc(endian.Native.Uint64(bm.bitmap)) - } else { + } else if length > 0 { setLSB(&bm.curword, bm.bitmap[0]) } return bm @@ -422,3 +424,152 @@ func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) { dst[nbytes-1] &= ^trailMask dst[nbytes-1] |= lastData & trailMask } + +type bitOp struct { + opWord func(uint64, uint64) uint64 + opByte func(byte, byte) byte + opAligned func(l, r, o []byte) +} + +var ( + bitAndOp = bitOp{ + opWord: func(l, r uint64) uint64 { return l & r }, + opByte: func(l, r byte) byte { return l & r }, + } + bitOrOp = bitOp{ + opWord: func(l, r uint64) uint64 { return l | r }, + opByte: func(l, r byte) byte { return l | r }, + } +) + +func alignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { + debug.Assert(lOffset%8 == rOffset%8, "aligned bitmap op called with unaligned offsets") + debug.Assert(lOffset%8 == outOffset%8, "aligned bitmap op called with unaligned output offset") + + nbytes := BytesForBits(length + lOffset%8) + left = left[lOffset/8:] + right = right[rOffset/8:] + out = out[outOffset/8:] + endMask := (lOffset + length%8) + switch nbytes { + case 0: + return + case 1: // everything within a single byte + // (length+lOffset%8) <= 8 + mask := PrecedingBitmask[lOffset%8] + if endMask != 0 { + mask |= TrailingBitmask[(lOffset+length)%8] + } + out[0] = (out[0] & mask) | (op.opByte(left[0], right[0]) &^ mask) + case 2: // don't send zero length to opAligned + firstByteMask := PrecedingBitmask[lOffset%8] + out[0] = (out[0] & firstByteMask) | (op.opByte(left[0], right[0]) &^ firstByteMask) + lastByteMask := byte(0) + if endMask != 0 { + lastByteMask = TrailingBitmask[(lOffset+length)%8] + } + out[1] = (out[1] & lastByteMask) | (op.opByte(left[1], right[1]) &^ lastByteMask) + default: + firstByteMask := PrecedingBitmask[lOffset%8] + out[0] = (out[0] & firstByteMask) | (op.opByte(left[0], right[0]) &^ firstByteMask) + + op.opAligned(left[1:nbytes-1], right[1:nbytes-1], out[1:nbytes-1]) + + lastByteMask := byte(0) + if endMask != 0 { + lastByteMask = TrailingBitmask[(lOffset+length)%8] + } + out[nbytes-1] = (out[nbytes-1] & lastByteMask) | (op.opByte(left[nbytes-1], right[nbytes-1]) &^ lastByteMask) + } +} + +func unalignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { + leftRdr := NewBitmapWordReader(left, int(lOffset), int(length)) + rightRdr := NewBitmapWordReader(right, int(rOffset), int(length)) + writer := NewBitmapWordWriter(out, int(outOffset), int(length)) + + for nwords := leftRdr.Words(); nwords > 0; nwords-- { + writer.PutNextWord(op.opWord(leftRdr.NextWord(), rightRdr.NextWord())) + } + for nbytes := leftRdr.TrailingBytes(); nbytes > 0; nbytes-- { + leftByte, leftValid := leftRdr.NextTrailingByte() + rightByte, rightValid := rightRdr.NextTrailingByte() + debug.Assert(leftValid == rightValid, "unexpected mismatch of valid bits") + writer.PutNextTrailingByte(op.opByte(leftByte, rightByte), leftValid) + } +} + +func BitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset, length int64) { + if (outOffset%8 == lOffset%8) && (outOffset%8 == rOffset%8) { + // fastcase! + alignedBitmapOp(op, left, right, lOffset, rOffset, out, outOffset, length) + } else { + unalignedBitmapOp(op, left, right, lOffset, rOffset, out, outOffset, length) + } +} + +func BitmapOpAlloc(mem memory.Allocator, op bitOp, left, right []byte, lOffset, rOffset int64, length int64, outOffset int64) *memory.Buffer { + bits := length + outOffset + buf := memory.NewResizableBuffer(mem) + buf.Resize(int(BytesForBits(bits))) + BitmapOp(op, left, right, lOffset, rOffset, buf.Bytes(), outOffset, length) + return buf +} + +func BitmapAnd(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { + BitmapOp(bitAndOp, left, right, lOffset, rOffset, out, outOffset, length) +} + +func BitmapOr(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { + BitmapOp(bitOrOp, left, right, lOffset, rOffset, out, outOffset, length) +} + +func BitmapAndAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { + return BitmapOpAlloc(mem, bitAndOp, left, right, lOffset, rOffset, length, outOffset) +} + +func BitmapOrAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { + return BitmapOpAlloc(mem, bitOrOp, left, right, lOffset, rOffset, length, outOffset) +} + +func BitmapEquals(left, right []byte, lOffset, rOffset int64, length int64) bool { + if lOffset%8 == 0 && rOffset%8 == 0 { + // byte aligned, fast path, can use bytes.Equal (memcmp) + byteLen := length / 8 + lStart := lOffset / 8 + rStart := rOffset / 8 + if !bytes.Equal(left[lStart:lStart+byteLen], right[rStart:rStart+byteLen]) { + return false + } + + // check trailing bits + for i := (length / 8) * 8; i < length; i++ { + if BitIsSet(left, int(lOffset+i)) != BitIsSet(right, int(rOffset+i)) { + return false + } + } + return true + } + + lrdr := NewBitmapWordReader(left, int(lOffset), int(length)) + rrdr := NewBitmapWordReader(right, int(rOffset), int(length)) + + nwords := lrdr.Words() + for nwords > 0 { + nwords-- + if lrdr.NextWord() != rrdr.NextWord() { + return false + } + } + + nbytes := lrdr.TrailingBytes() + for nbytes > 0 { + nbytes-- + lbt, _ := lrdr.NextTrailingByte() + rbt, _ := rrdr.NextTrailingByte() + if lbt != rbt { + return false + } + } + return true +} diff --git a/go/arrow/bitutil/bitmaps_test.go b/go/arrow/bitutil/bitmaps_test.go index e4d4f8d58a728..0924c351fae6b 100644 --- a/go/arrow/bitutil/bitmaps_test.go +++ b/go/arrow/bitutil/bitmaps_test.go @@ -22,8 +22,10 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v9/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" ) func bitmapFromSlice(vals []int, bitOffset int) []byte { @@ -356,3 +358,223 @@ func BenchmarkBitmapReader(b *testing.B) { } }) } + +type ( + noAllocFn func(left, right []byte, lOffset, rOffset int64, out []byte, outOffset, length int64) + allocFn func(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer + bitmapOp struct { + noAlloc noAllocFn + alloc allocFn + } +) + +type BitmapOpSuite struct { + suite.Suite +} + +func (s *BitmapOpSuite) testAligned(op bitmapOp, leftBits, rightBits []int, resultBits []bool) { + var ( + left, right []byte + out *memory.Buffer + length int64 + ) + for _, lOffset := range []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536} { + s.Run(fmt.Sprintf("left offset %d", lOffset), func() { + left = bitmapFromSlice(leftBits, int(lOffset)) + length = int64(len(leftBits)) + for _, rOffset := range []int64{lOffset, lOffset + 8, lOffset + 40} { + s.Run(fmt.Sprintf("right offset %d", rOffset), func() { + right = bitmapFromSlice(rightBits, int(rOffset)) + for _, outOffset := range []int64{lOffset, lOffset + 16, lOffset + 24} { + s.Run(fmt.Sprintf("out offset %d", outOffset), func() { + s.Run("zero-length", func() { + out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, 0, outOffset) + s.EqualValues(bitutil.BytesForBits(outOffset), out.Len()) + expected := make([]byte, out.Len()) + if out.Len() > 0 { + s.Equal(expected, out.Bytes()) + } else { + s.Nil(out.Bytes()) + } + + memory.Set(out.Bytes(), 0xFF) + op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, 0) + if out.Len() > 0 { + memory.Set(expected, 0xFF) + s.Equal(expected, out.Bytes()) + } else { + s.Nil(out.Bytes()) + } + out.Release() + }) + + out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, outOffset) + defer out.Release() + rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) + assertReaderVals(s.T(), rdr, resultBits) + + memory.Set(out.Bytes(), 0x00) + op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length) + rdr = bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) + assertReaderVals(s.T(), rdr, resultBits) + }) + } + }) + } + }) + } +} + +func (s *BitmapOpSuite) testUnaligned(op bitmapOp, leftBits, rightBits []int, resultBits []bool) { + var ( + left, right []byte + out *memory.Buffer + length int64 + offsets = []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536} + ) + + for _, lOffset := range offsets { + s.Run(fmt.Sprintf("left offset %d", lOffset), func() { + left = bitmapFromSlice(leftBits, int(lOffset)) + length = int64(len(leftBits)) + for _, rOffset := range offsets { + s.Run(fmt.Sprintf("right offset %d", rOffset), func() { + right = bitmapFromSlice(rightBits, int(rOffset)) + for _, outOffset := range offsets { + s.Run(fmt.Sprintf("out offset %d", outOffset), func() { + s.Run("zero-length", func() { + out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, 0, outOffset) + s.EqualValues(bitutil.BytesForBits(outOffset), out.Len()) + expected := make([]byte, out.Len()) + if out.Len() > 0 { + s.Equal(expected, out.Bytes()) + } else { + s.Nil(out.Bytes()) + } + + memory.Set(out.Bytes(), 0xFF) + op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, 0) + if out.Len() > 0 { + memory.Set(expected, 0xFF) + s.Equal(expected, out.Bytes()) + } else { + s.Nil(out.Bytes()) + } + out.Release() + }) + s.Run("alloc", func() { + out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, outOffset) + rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) + assertReaderVals(s.T(), rdr, resultBits) + }) + s.Run("noalloc", func() { + memory.Set(out.Bytes(), 0x00) + op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length) + rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) + assertReaderVals(s.T(), rdr, resultBits) + }) + }) + } + }) + } + }) + } +} + +func (s *BitmapOpSuite) TestBitmapAnd() { + op := bitmapOp{ + noAlloc: bitutil.BitmapAnd, + alloc: bitutil.BitmapAndAlloc, + } + + leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} + rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0} + resultBits := []bool{false, false, true, false, false, false, false, false, false, true, false, false, false, false} + + s.Run("aligned", func() { + s.testAligned(op, leftBits, rightBits, resultBits) + }) + s.Run("unaligned", func() { + s.testUnaligned(op, leftBits, rightBits, resultBits) + }) +} + +func (s *BitmapOpSuite) TestBitmapOr() { + op := bitmapOp{ + noAlloc: bitutil.BitmapOr, + alloc: bitutil.BitmapOrAlloc, + } + + leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} + rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0} + resultBits := []bool{false, true, true, true, true, true, false, true, true, true, true, true, true, true} + + s.Run("aligned", func() { + s.testAligned(op, leftBits, rightBits, resultBits) + }) + s.Run("unaligned", func() { + s.testUnaligned(op, leftBits, rightBits, resultBits) + }) +} + +func TestBitmapOps(t *testing.T) { + suite.Run(t, new(BitmapOpSuite)) +} + +func TestSmallBitmapOp(t *testing.T) { + // 0b01111111 0b11001111 + left := [2]byte{127, 207} + // 0b11111110 0b01111111 + right := [2]byte{254, 127} + // 0b01111110 0b01001111 + results := [2]byte{126, 79} + + var out [2]byte + bitutil.BitmapAnd(left[:], right[:], 0, 0, out[:], 0, 8) + assert.Equal(t, results[:1], out[:1]) + + bitutil.BitmapAnd(left[:], right[:], 0, 0, out[:], 0, 16) + assert.Equal(t, results, out) +} + +func createRandomBuffer(mem memory.Allocator, src *rand.Rand, nbytes int) []byte { + buf := mem.Allocate(nbytes) + src.Read(buf) + return buf +} + +func benchBitOpImpl(b *testing.B, nBytes, offset int, op noAllocFn) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + r := rand.New(rand.NewSource(0)) + + buf1 := createRandomBuffer(mem, r, nBytes) + buf2 := createRandomBuffer(mem, r, nBytes) + buf3 := createRandomBuffer(mem, r, nBytes) + b.Cleanup(func() { + mem.Free(buf1) + mem.Free(buf2) + mem.Free(buf3) + }) + + numBits := nBytes*8 - offset + b.ResetTimer() + b.SetBytes(bitutil.BytesForBits(int64(numBits)) * 2) + for i := 0; i < b.N; i++ { + op(buf1, buf2, 0, int64(offset), buf3, 0, int64(numBits)) + } +} + +func BenchmarkBitmapAnd(b *testing.B) { + sizes := []int{bufferSize * 4, bufferSize * 16} + offsets := []int{0, 1, 2} + + for _, s := range sizes { + b.Run(fmt.Sprintf("nbytes=%d", s), func(b *testing.B) { + for _, o := range offsets { + b.Run(fmt.Sprintf("%d", o), func(b *testing.B) { + benchBitOpImpl(b, s, o, bitutil.BitmapAnd) + }) + } + }) + } +} diff --git a/go/arrow/bitutil/bitutil.go b/go/arrow/bitutil/bitutil.go index 2422dd31ec9ae..36bb02fc70593 100644 --- a/go/arrow/bitutil/bitutil.go +++ b/go/arrow/bitutil/bitutil.go @@ -22,7 +22,7 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/memory" ) var ( diff --git a/go/arrow/bitutil/bitutil_test.go b/go/arrow/bitutil/bitutil_test.go index d8f5e1a2fea40..c32b01cddd6a9 100644 --- a/go/arrow/bitutil/bitutil_test.go +++ b/go/arrow/bitutil/bitutil_test.go @@ -21,8 +21,8 @@ import ( "math/rand" "testing" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/testing/tools" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 5af7ddb7d23bf..9e1f0b2076dbc 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -42,10 +42,10 @@ import ( "syscall" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" "golang.org/x/xerrors" ) @@ -75,7 +75,9 @@ var formatToSimpleType = map[string]arrow.DataType{ "f": arrow.PrimitiveTypes.Float32, "g": arrow.PrimitiveTypes.Float64, "z": arrow.BinaryTypes.Binary, + "Z": arrow.BinaryTypes.LargeBinary, "u": arrow.BinaryTypes.String, + "U": arrow.BinaryTypes.LargeString, "tdD": arrow.FixedWidthTypes.Date32, "tdm": arrow.FixedWidthTypes.Date64, "tts": arrow.FixedWidthTypes.Time32s, @@ -226,6 +228,8 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { switch f[1] { case 'l': // list dt = arrow.ListOfField(childFields[0]) + case 'L': // large list + dt = arrow.LargeListOfField(childFields[0]) case 'w': // fixed size list is w:# where # is the list size. listSize, err := strconv.Atoi(strings.Split(f, ":")[1]) if err != nil { @@ -286,6 +290,11 @@ func (imp *cimporter) doImportChildren() error { if err := imp.children[0].importChild(imp, children[0]); err != nil { return err } + case arrow.LARGE_LIST: // only one child to import + imp.children[0].dt = imp.dt.(*arrow.LargeListType).Elem() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } case arrow.FIXED_SIZE_LIST: // only one child to import imp.children[0].dt = imp.dt.(*arrow.FixedSizeListType).Elem() if err := imp.children[0].importChild(imp, children[0]); err != nil { @@ -354,11 +363,17 @@ func (imp *cimporter) doImport(src *CArrowArray) error { case arrow.FixedWidthDataType: return imp.importFixedSizePrimitive() case *arrow.StringType: - return imp.importStringLike() + return imp.importStringLike(int64(arrow.Int32SizeBytes)) case *arrow.BinaryType: - return imp.importStringLike() + return imp.importStringLike(int64(arrow.Int32SizeBytes)) + case *arrow.LargeStringType: + return imp.importStringLike(int64(arrow.Int64SizeBytes)) + case *arrow.LargeBinaryType: + return imp.importStringLike(int64(arrow.Int64SizeBytes)) case *arrow.ListType: return imp.importListLike() + case *arrow.LargeListType: + return imp.importListLike() case *arrow.MapType: return imp.importListLike() case *arrow.FixedSizeListType: @@ -399,7 +414,7 @@ func (imp *cimporter) doImport(src *CArrowArray) error { return nil } -func (imp *cimporter) importStringLike() error { +func (imp *cimporter) importStringLike(offsetByteWidth int64) error { if err := imp.checkNoChildren(); err != nil { return err } @@ -413,8 +428,17 @@ func (imp *cimporter) importStringLike() error { return err } - offsets := imp.importOffsetsBuffer(1) - values := imp.importVariableValuesBuffer(2, 1, arrow.Int32Traits.CastFromBytes(offsets.Bytes())) + offsets := imp.importOffsetsBuffer(1, offsetByteWidth) + var nvals int64 + switch offsetByteWidth { + case 4: + typedOffsets := arrow.Int32Traits.CastFromBytes(offsets.Bytes()) + nvals = int64(typedOffsets[imp.arr.offset+imp.arr.length]) + case 8: + typedOffsets := arrow.Int64Traits.CastFromBytes(offsets.Bytes()) + nvals = typedOffsets[imp.arr.offset+imp.arr.length] + } + values := imp.importVariableValuesBuffer(2, 1, nvals) imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, values}, nil, int(imp.arr.null_count), int(imp.arr.offset)) return nil } @@ -433,7 +457,8 @@ func (imp *cimporter) importListLike() error { return err } - offsets := imp.importOffsetsBuffer(1) + offsetSize := imp.dt.Layout().Buffers[1].ByteWidth + offsets := imp.importOffsetsBuffer(1, int64(offsetSize)) imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) return nil } @@ -513,14 +538,13 @@ func (imp *cimporter) importFixedSizeBuffer(bufferID int, byteWidth int64) *memo return imp.importBuffer(bufferID, bufsize) } -func (imp *cimporter) importOffsetsBuffer(bufferID int) *memory.Buffer { - const offsetsize = int64(arrow.Int32SizeBytes) // go doesn't implement int64 offsets yet +func (imp *cimporter) importOffsetsBuffer(bufferID int, offsetsize int64) *memory.Buffer { bufsize := offsetsize * int64((imp.arr.length + imp.arr.offset + 1)) return imp.importBuffer(bufferID, bufsize) } -func (imp *cimporter) importVariableValuesBuffer(bufferID int, byteWidth int, offsets []int32) *memory.Buffer { - bufsize := byteWidth * int(offsets[imp.arr.length]) +func (imp *cimporter) importVariableValuesBuffer(bufferID int, byteWidth, nvals int64) *memory.Buffer { + bufsize := byteWidth * nvals return imp.importBuffer(bufferID, int64(bufsize)) } diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index 21bdf58b2b6b4..a3da68447db22 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -39,10 +39,10 @@ import ( "strings" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/endian" - "github.com/apache/arrow/go/v9/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/ipc" ) func encodeCMetadata(keys, values []string) []byte { @@ -151,8 +151,12 @@ func (exp *schemaExporter) exportFormat(dt arrow.DataType) string { return fmt.Sprintf("d:%d,%d", dt.Precision, dt.Scale) case *arrow.BinaryType: return "z" + case *arrow.LargeBinaryType: + return "Z" case *arrow.StringType: return "u" + case *arrow.LargeStringType: + return "U" case *arrow.Date32Type: return "tdD" case *arrow.Date64Type: @@ -212,6 +216,8 @@ func (exp *schemaExporter) exportFormat(dt arrow.DataType) string { return "tin" case *arrow.ListType: return "+l" + case *arrow.LargeListType: + return "+L" case *arrow.FixedSizeListType: return fmt.Sprintf("+w:%d", dt.Len()) case *arrow.StructType: @@ -236,6 +242,9 @@ func (exp *schemaExporter) export(field arrow.Field) { case *arrow.ListType: exp.children = make([]schemaExporter, 1) exp.children[0].export(dt.ElemField()) + case *arrow.LargeListType: + exp.children = make([]schemaExporter, 1) + exp.children[0].export(dt.ElemField()) case *arrow.StructType: exp.children = make([]schemaExporter, len(dt.Fields())) for i, f := range dt.Fields() { diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go index 6706ddfb3a894..03c01181c13ef 100644 --- a/go/arrow/cdata/cdata_test.go +++ b/go/arrow/cdata/cdata_test.go @@ -31,10 +31,10 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) @@ -113,7 +113,9 @@ func TestPrimitiveSchemas(t *testing.T) { {arrow.PrimitiveTypes.Float64, "g"}, {&arrow.FixedSizeBinaryType{ByteWidth: 3}, "w:3"}, {arrow.BinaryTypes.Binary, "z"}, + {arrow.BinaryTypes.LargeBinary, "Z"}, {arrow.BinaryTypes.String, "u"}, + {arrow.BinaryTypes.LargeString, "U"}, {&arrow.Decimal128Type{Precision: 16, Scale: 4}, "d:16,4"}, {&arrow.Decimal128Type{Precision: 15, Scale: 0}, "d:15,0"}, {&arrow.Decimal128Type{Precision: 15, Scale: -4}, "d:15,-4"}, @@ -397,6 +399,22 @@ func createTestStrArr() arrow.Array { return bld.NewStringArray() } +func createTestLargeBinaryArr() arrow.Array { + bld := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.LargeBinary) + defer bld.Release() + + bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) + return bld.NewLargeBinaryArray() +} + +func createTestLargeStrArr() arrow.Array { + bld := array.NewLargeStringBuilder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]string{"foo", "bar", ""}, []bool{true, true, false}) + return bld.NewLargeStringArray() +} + func createTestDecimalArr() arrow.Array { bld := array.NewDecimal128Builder(memory.DefaultAllocator, &arrow.Decimal128Type{Precision: 16, Scale: 4}) defer bld.Release() @@ -425,6 +443,8 @@ func TestPrimitiveArrs(t *testing.T) { {"fixed size binary", createTestFSBArr}, {"binary", createTestBinaryArr}, {"utf8", createTestStrArr}, + {"largebinary", createTestLargeBinaryArr}, + {"largeutf8", createTestLargeStrArr}, {"decimal128", createTestDecimalArr}, } @@ -482,6 +502,23 @@ func createTestListArr() arrow.Array { return bld.NewArray() } +func createTestLargeListArr() arrow.Array { + bld := array.NewLargeListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) + defer bld.Release() + + vb := bld.ValueBuilder().(*array.Int8Builder) + + bld.Append(true) + vb.AppendValues([]int8{1, 2}, []bool{true, true}) + + bld.Append(true) + vb.AppendValues([]int8{3, 0}, []bool{true, false}) + + bld.AppendNull() + + return bld.NewArray() +} + func createTestFixedSizeList() arrow.Array { bld := array.NewFixedSizeListBuilder(memory.DefaultAllocator, 2, arrow.PrimitiveTypes.Int64) defer bld.Release() @@ -545,6 +582,7 @@ func TestNestedArrays(t *testing.T) { fn func() arrow.Array }{ {"list", createTestListArr}, + {"large list", createTestLargeListArr}, {"fixed size list", createTestFixedSizeList}, {"struct", createTestStructArr}, {"map", createTestMapArr}, diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go index 9cec1263e862d..bb4db1e339be0 100644 --- a/go/arrow/cdata/cdata_test_framework.go +++ b/go/arrow/cdata/cdata_test_framework.go @@ -53,8 +53,8 @@ import "C" import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" ) const ( @@ -208,6 +208,10 @@ func createCArr(arr arrow.Array) *CArrowArray { clist := []*CArrowArray{createCArr(arr.ListValues())} children = (**CArrowArray)(unsafe.Pointer(&clist[0])) nchildren += 1 + case *array.LargeList: + clist := []*CArrowArray{createCArr(arr.ListValues())} + children = (**CArrowArray)(unsafe.Pointer(&clist[0])) + nchildren += 1 case *array.FixedSizeList: clist := []*CArrowArray{createCArr(arr.ListValues())} children = (**CArrowArray)(unsafe.Pointer(&clist[0])) diff --git a/go/arrow/cdata/exports.go b/go/arrow/cdata/exports.go index 36b19058163c7..4ad4b7fac3135 100644 --- a/go/arrow/cdata/exports.go +++ b/go/arrow/cdata/exports.go @@ -22,7 +22,7 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" ) // #include diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go index 83cdab51138e7..e567ce599a449 100644 --- a/go/arrow/cdata/interface.go +++ b/go/arrow/cdata/interface.go @@ -22,10 +22,10 @@ package cdata import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/arrio" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/arrio" + "github.com/apache/arrow/go/v10/arrow/memory" "golang.org/x/xerrors" ) diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go index d05e79f9a9ede..38afc1cc9d0c0 100644 --- a/go/arrow/cdata/test/test_cimport.go +++ b/go/arrow/cdata/test/test_cimport.go @@ -23,10 +23,10 @@ import ( "fmt" "runtime" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/cdata" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/cdata" + "github.com/apache/arrow/go/v10/arrow/memory" ) // #include diff --git a/go/arrow/compare.go b/go/arrow/compare.go index c8025c3a88c5b..6cc01bc9a22d7 100644 --- a/go/arrow/compare.go +++ b/go/arrow/compare.go @@ -92,6 +92,32 @@ func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool { } } return true + case UnionType: + r := right.(UnionType) + if l.Mode() != r.Mode() { + return false + } + + if !reflect.DeepEqual(l.ChildIDs(), r.ChildIDs()) { + return false + } + + for i := range l.Fields() { + leftField, rightField := l.Fields()[i], r.Fields()[i] + switch { + case leftField.Name != rightField.Name: + return false + case leftField.Nullable != rightField.Nullable: + return false + case !TypeEqual(leftField.Type, rightField.Type, opts...): + return false + case cfg.metadata && !leftField.Metadata.Equal(rightField.Metadata): + return false + case l.TypeCodes()[i] != r.TypeCodes()[i]: + return false + } + } + return true default: return reflect.DeepEqual(left, right) } diff --git a/go/arrow/compare_test.go b/go/arrow/compare_test.go index e787f55ab2312..43a14daa68602 100644 --- a/go/arrow/compare_test.go +++ b/go/arrow/compare_test.go @@ -44,6 +44,15 @@ func TestTypeEqual(t *testing.T) { { &BinaryType{}, &StringType{}, false, false, }, + { + &LargeBinaryType{}, &LargeStringType{}, false, false, + }, + { + BinaryTypes.LargeBinary, &LargeBinaryType{}, true, false, + }, + { + BinaryTypes.LargeString, &LargeStringType{}, true, false, + }, { &Time32Type{Unit: Second}, &Time32Type{Unit: Second}, true, false, }, diff --git a/go/arrow/compute/datum.go b/go/arrow/compute/datum.go index 0060172b74c27..a73902e55ad97 100644 --- a/go/arrow/compute/datum.go +++ b/go/arrow/compute/datum.go @@ -18,52 +18,39 @@ package compute import ( "fmt" - "strings" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/scalar" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/scalar" ) -//go:generate go run golang.org/x/tools/cmd/stringer -type=ValueShape -linecomment //go:generate go run golang.org/x/tools/cmd/stringer -type=DatumKind -linecomment -// ValueShape is a brief description of the shape of a value (array, scalar or otherwise) -type ValueShape int8 - -const ( - // either Array or Scalar - ShapeAny ValueShape = iota // any - ShapeArray // array - ShapeScalar // scalar -) - -// ValueDescr is a descriptor type giving both the shape and the datatype of a value -// but without the data. -type ValueDescr struct { - Shape ValueShape - Type arrow.DataType -} - -func (v *ValueDescr) String() string { - return fmt.Sprintf("%s [%s]", v.Shape, v.Type) -} - // DatumKind is an enum used for denoting which kind of type a datum is encapsulating type DatumKind int const ( - KindNone DatumKind = iota // none - KindScalar // scalar - KindArray // array - KindChunked // chunked_array - KindRecord // record_batch - KindTable // table - KindCollection // collection + KindNone DatumKind = iota // none + KindScalar // scalar + KindArray // array + KindChunked // chunked_array + KindRecord // record_batch + KindTable // table ) const UnknownLength int64 = -1 +// DatumIsValue returns true if the datum passed is a Scalar, Array +// or ChunkedArray type (e.g. it contains a specific value not a +// group of values) +func DatumIsValue(d Datum) bool { + switch d.Kind() { + case KindScalar, KindArray, KindChunked: + return true + } + return false +} + // Datum is a variant interface for wrapping the various Arrow data structures // for now the various Datum types just hold a Value which is the type they // are wrapping, but it might make sense in the future for those types @@ -82,8 +69,6 @@ type Datum interface { // a slice with 1 element for Array, and the slice of chunks for a chunked array. type ArrayLikeDatum interface { Datum - Shape() ValueShape - Descr() ValueDescr NullN() int64 Type() arrow.DataType Chunks() []arrow.Array @@ -114,12 +99,10 @@ type ScalarDatum struct { } func (ScalarDatum) Kind() DatumKind { return KindScalar } -func (ScalarDatum) Shape() ValueShape { return ShapeScalar } func (ScalarDatum) Len() int64 { return 1 } func (ScalarDatum) Chunks() []arrow.Array { return nil } func (d *ScalarDatum) Type() arrow.DataType { return d.Value.DataType() } func (d *ScalarDatum) String() string { return d.Value.String() } -func (d *ScalarDatum) Descr() ValueDescr { return ValueDescr{ShapeScalar, d.Value.DataType()} } func (d *ScalarDatum) ToScalar() (scalar.Scalar, error) { return d.Value, nil } @@ -155,11 +138,9 @@ type ArrayDatum struct { } func (ArrayDatum) Kind() DatumKind { return KindArray } -func (ArrayDatum) Shape() ValueShape { return ShapeArray } func (d *ArrayDatum) Type() arrow.DataType { return d.Value.DataType() } func (d *ArrayDatum) Len() int64 { return int64(d.Value.Len()) } func (d *ArrayDatum) NullN() int64 { return int64(d.Value.NullN()) } -func (d *ArrayDatum) Descr() ValueDescr { return ValueDescr{ShapeArray, d.Value.DataType()} } func (d *ArrayDatum) String() string { return fmt.Sprintf("Array:{%s}", d.Value.DataType()) } func (d *ArrayDatum) MakeArray() arrow.Array { return array.MakeFromData(d.Value) } func (d *ArrayDatum) Chunks() []arrow.Array { return []arrow.Array{d.MakeArray()} } @@ -191,11 +172,9 @@ type ChunkedDatum struct { } func (ChunkedDatum) Kind() DatumKind { return KindChunked } -func (ChunkedDatum) Shape() ValueShape { return ShapeArray } func (d *ChunkedDatum) Type() arrow.DataType { return d.Value.DataType() } func (d *ChunkedDatum) Len() int64 { return int64(d.Value.Len()) } func (d *ChunkedDatum) NullN() int64 { return int64(d.Value.NullN()) } -func (d *ChunkedDatum) Descr() ValueDescr { return ValueDescr{ShapeArray, d.Value.DataType()} } func (d *ChunkedDatum) String() string { return fmt.Sprintf("Array:{%s}", d.Value.DataType()) } func (d *ChunkedDatum) Chunks() []arrow.Array { return d.Value.Chunks() } @@ -258,46 +237,6 @@ func (d *TableDatum) Equals(other Datum) bool { } // CollectionDatum is a slice of Datums -type CollectionDatum []Datum - -func (CollectionDatum) Kind() DatumKind { return KindCollection } -func (c CollectionDatum) Len() int64 { return int64(len(c)) } -func (c CollectionDatum) String() string { - var b strings.Builder - b.WriteString("Collection(") - for i, d := range c { - if i > 0 { - b.WriteString(", ") - } - b.WriteString(d.String()) - } - b.WriteByte(')') - return b.String() -} - -func (c CollectionDatum) Release() { - for _, v := range c { - v.Release() - } -} - -func (c CollectionDatum) Equals(other Datum) bool { - rhs, ok := other.(CollectionDatum) - if !ok { - return false - } - - if len(c) != len(rhs) { - return false - } - - for i := range c { - if !c[i].Equals(rhs[i]) { - return false - } - } - return true -} // NewDatum will construct the appropriate Datum type based on what is passed in // as the argument. @@ -318,6 +257,9 @@ func NewDatum(value interface{}) Datum { case arrow.Array: v.Data().Retain() return &ArrayDatum{v.Data().(*array.Data)} + case arrow.ArrayData: + v.Retain() + return &ArrayDatum{v} case *arrow.Chunked: v.Retain() return &ChunkedDatum{v} @@ -327,8 +269,6 @@ func NewDatum(value interface{}) Datum { case arrow.Table: v.Retain() return &TableDatum{v} - case []Datum: - return CollectionDatum(v) case scalar.Scalar: return &ScalarDatum{v} default: @@ -342,5 +282,4 @@ var ( _ ArrayLikeDatum = (*ChunkedDatum)(nil) _ TableLikeDatum = (*RecordDatum)(nil) _ TableLikeDatum = (*TableDatum)(nil) - _ Datum = (CollectionDatum)(nil) ) diff --git a/go/arrow/compute/datumkind_string.go b/go/arrow/compute/datumkind_string.go index 56cef315ac62f..8537c0b7efe31 100644 --- a/go/arrow/compute/datumkind_string.go +++ b/go/arrow/compute/datumkind_string.go @@ -14,12 +14,11 @@ func _() { _ = x[KindChunked-3] _ = x[KindRecord-4] _ = x[KindTable-5] - _ = x[KindCollection-6] } -const _DatumKind_name = "nonescalararraychunked_arrayrecord_batchtablecollection" +const _DatumKind_name = "nonescalararraychunked_arrayrecord_batchtable" -var _DatumKind_index = [...]uint8{0, 4, 10, 15, 28, 40, 45, 55} +var _DatumKind_index = [...]uint8{0, 4, 10, 15, 28, 40, 45} func (i DatumKind) String() string { if i < 0 || i >= DatumKind(len(_DatumKind_index)-1) { diff --git a/go/arrow/compute/doc.go b/go/arrow/compute/doc.go new file mode 100644 index 0000000000000..ee19cd4f965f8 --- /dev/null +++ b/go/arrow/compute/doc.go @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package compute is a native-go implementation of an Acero-like +// arrow compute engine. +// +// While consumers of Arrow that are able to use CGO could utilize the +// C Data API (using the cdata package) and could link against the +// acero library directly, there are consumers who cannot use CGO. This +// is an attempt to provide for those users, and in general create a +// native-go arrow compute engine. +// +// Everything in this package should be considered Experimental for now. +package compute + +//go:generate stringer -type=FuncKind -linecomment diff --git a/go/arrow/compute/exec.go b/go/arrow/compute/exec.go new file mode 100644 index 0000000000000..5719ee153da5a --- /dev/null +++ b/go/arrow/compute/exec.go @@ -0,0 +1,165 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compute + +import ( + "context" + "fmt" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/compute/internal/exec" + "github.com/apache/arrow/go/v10/arrow/internal/debug" +) + +func haveChunkedArray(values []Datum) bool { + for _, v := range values { + if v.Kind() == KindChunked { + return true + } + } + return false +} + +// ExecSpanFromBatch constructs and returns a new ExecSpan from the values +// inside of the ExecBatch which could be scalar or arrays. +// +// This is mostly used for tests but is also a convenience method for other +// cases. +func ExecSpanFromBatch(batch *ExecBatch) *exec.ExecSpan { + out := &exec.ExecSpan{Len: batch.Len, Values: make([]exec.ExecValue, len(batch.Values))} + for i, v := range batch.Values { + outVal := &out.Values[i] + if v.Kind() == KindScalar { + outVal.Scalar = v.(*ScalarDatum).Value + } else { + outVal.Array.SetMembers(v.(*ArrayDatum).Value) + outVal.Scalar = nil + } + } + return out +} + +// this is the primary driver of execution +func execInternal(ctx context.Context, fn Function, opts FunctionOptions, passedLen int64, args ...Datum) (result Datum, err error) { + if opts == nil { + if err = checkOptions(fn, opts); err != nil { + return + } + opts = fn.DefaultOptions() + } + + // we only allow Array, ChunkedArray, and Scalars for now. + // RecordBatch and Table datums are disallowed. + if err = checkAllIsValue(args); err != nil { + return + } + + inTypes := make([]arrow.DataType, len(args)) + for i, a := range args { + inTypes[i] = a.(ArrayLikeDatum).Type() + } + + var ( + k exec.Kernel + executor kernelExecutor + ) + + switch fn.Kind() { + case FuncScalar: + executor = scalarExecPool.Get().(*scalarExecutor) + defer func() { + executor.clear() + scalarExecPool.Put(executor.(*scalarExecutor)) + }() + default: + return nil, fmt.Errorf("%w: direct execution of %s", arrow.ErrNotImplemented, fn.Kind()) + } + + if k, err = fn.DispatchBest(inTypes...); err != nil { + return + } + + kctx := &exec.KernelCtx{Ctx: ctx, Kernel: k} + init := k.GetInitFn() + kinitArgs := exec.KernelInitArgs{Kernel: k, Inputs: inTypes, Options: opts} + if init != nil { + kctx.State, err = init(kctx, kinitArgs) + if err != nil { + return + } + } + + if err = executor.Init(kctx, kinitArgs); err != nil { + return + } + + input := ExecBatch{Values: args, Len: 0} + if input.NumValues() == 0 { + if passedLen != -1 { + input.Len = passedLen + } + } else { + inferred, _ := inferBatchLength(input.Values) + input.Len = inferred + switch fn.Kind() { + case FuncScalar: + if passedLen != -1 && passedLen != inferred { + return nil, fmt.Errorf("%w: passed batch length for execution did not match actual length for scalar fn execution", + arrow.ErrInvalid) + } + } + } + + ectx := GetExecCtx(ctx) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ch := make(chan Datum, ectx.ExecChannelSize) + go func() { + defer close(ch) + if err = executor.Execute(ctx, &input, ch); err != nil { + cancel() + } + }() + + result = executor.WrapResults(ctx, ch, haveChunkedArray(input.Values)) + debug.Assert(executor.CheckResultType(result) == nil, "invalid result type") + + if ctx.Err() == context.Canceled { + result.Release() + } + + return +} + +// CallFunction is a one-shot invoker for all types of functions. +// +// It will perform kernel-dispatch, argument checking, iteration of +// ChunkedArray inputs and wrapping of outputs. +// +// To affect the execution options, you must call SetExecCtx and pass +// the resulting context in here. +func CallFunction(ctx context.Context, funcName string, opts FunctionOptions, args ...Datum) (Datum, error) { + ectx := GetExecCtx(ctx) + fn, ok := ectx.Registry.GetFunction(funcName) + if !ok { + return nil, fmt.Errorf("%w: function '%s' not found", arrow.ErrKey, funcName) + } + + return fn.Execute(ctx, opts, args...) +} diff --git a/go/arrow/compute/exec_internals_test.go b/go/arrow/compute/exec_internals_test.go new file mode 100644 index 0000000000000..93960dd16b2a8 --- /dev/null +++ b/go/arrow/compute/exec_internals_test.go @@ -0,0 +1,583 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compute + +import ( + "bytes" + "context" + "fmt" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/compute/internal/exec" + "github.com/apache/arrow/go/v10/arrow/internal/testing/gen" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" + "github.com/stretchr/testify/suite" +) + +type ComputeInternalsTestSuite struct { + suite.Suite + + mem *memory.CheckedAllocator + + execCtx ExecCtx + ctx *exec.KernelCtx + rng gen.RandomArrayGenerator +} + +func (c *ComputeInternalsTestSuite) SetupTest() { + c.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) + c.rng = gen.NewRandomArrayGenerator(0, c.mem) + + c.resetCtx() +} + +func (c *ComputeInternalsTestSuite) TearDownTest() { + c.mem.AssertSize(c.T(), 0) +} + +func (c *ComputeInternalsTestSuite) assertArrayEqual(expected, got arrow.Array) { + c.Truef(array.Equal(expected, got), "expected: %s\ngot: %s", expected, got) +} + +func (c *ComputeInternalsTestSuite) assertDatumEqual(expected arrow.Array, got Datum) { + arr := got.(*ArrayDatum).MakeArray() + defer arr.Release() + c.Truef(array.Equal(expected, arr), "expected: %s\ngot: %s", expected, arr) +} + +func (c *ComputeInternalsTestSuite) resetCtx() { + c.execCtx = ExecCtx{Registry: GetFunctionRegistry(), + ChunkSize: DefaultMaxChunkSize, PreallocContiguous: true} + c.ctx = &exec.KernelCtx{Ctx: SetExecCtx(context.Background(), c.execCtx)} +} + +func (c *ComputeInternalsTestSuite) getBoolArr(sz int64, trueprob, nullprob float64) arrow.Array { + return c.rng.Boolean(sz, trueprob, nullprob) +} + +func (c *ComputeInternalsTestSuite) getUint8Arr(sz int64, nullprob float64) arrow.Array { + return c.rng.Uint8(sz, 0, 100, nullprob) +} + +func (c *ComputeInternalsTestSuite) getInt32Arr(sz int64, nullprob float64) arrow.Array { + return c.rng.Int32(sz, 0, 1000, nullprob) +} + +func (c *ComputeInternalsTestSuite) getFloat64Arr(sz int64, nullprob float64) arrow.Array { + return c.rng.Float64(sz, 0, 1000, nullprob) +} + +func (c *ComputeInternalsTestSuite) getInt32Chunked(szs []int64) *arrow.Chunked { + chunks := make([]arrow.Array, 0) + for i, s := range szs { + chunks = append(chunks, c.getInt32Arr(s, 0.1)) + defer chunks[i].Release() + } + return arrow.NewChunked(arrow.PrimitiveTypes.Int32, chunks) +} + +func (c *ComputeInternalsTestSuite) assertValidityZeroExtraBits(data []byte, length, offset int) { + bitExtent := ((offset + length + 7) / 8) * 8 + for i := offset + length; i < bitExtent; i++ { + c.False(bitutil.BitIsSet(data, i)) + } +} + +type PropagateNullsSuite struct { + ComputeInternalsTestSuite +} + +func (p *PropagateNullsSuite) TestUnknownNullCountWithNullsZeroCopies() { + const length int = 16 + bitmap := [8]byte{254, 0, 0, 0, 0, 0, 0, 0} + nulls := memory.NewBufferBytes(bitmap[:]) + + output := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nil, nil}, nil, 0, 0) + input := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nulls, nil}, nil, array.UnknownNullCount, 0) + + var outSpan exec.ArraySpan + outSpan.SetMembers(output) + batch := ExecBatch{Values: []Datum{NewDatum(input)}, Len: int64(length)} + p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(&batch), &outSpan)) + p.Same(nulls, outSpan.Buffers[0].Owner) + p.EqualValues(array.UnknownNullCount, outSpan.Nulls) + p.Equal(9, int(outSpan.Len)-bitutil.CountSetBits(outSpan.Buffers[0].Buf, int(outSpan.Offset), int(outSpan.Len))) +} + +func (p *PropagateNullsSuite) TestUnknownNullCountWithoutNulls() { + const length int = 16 + bitmap := [8]byte{255, 255, 0, 0, 0, 0, 0, 0} + nulls := memory.NewBufferBytes(bitmap[:]) + + output := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nil, nil}, nil, 0, 0) + input := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nulls, nil}, nil, array.UnknownNullCount, 0) + + var outSpan exec.ArraySpan + outSpan.SetMembers(output) + batch := ExecBatch{Values: []Datum{NewDatum(input)}, Len: int64(length)} + p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(&batch), &outSpan)) + p.EqualValues(-1, outSpan.Nulls) + p.Same(nulls, outSpan.Buffers[0].Owner) +} + +func (p *PropagateNullsSuite) TestSetAllNulls() { + const length int = 16 + checkSetAll := func(vals []Datum, prealloc bool) { + // fresh bitmap with all 1s + bitmapData := [2]byte{255, 255} + preallocatedMem := memory.NewBufferBytes(bitmapData[:]) + + output := &exec.ArraySpan{ + Type: arrow.FixedWidthTypes.Boolean, + Len: int64(length), + Nulls: array.UnknownNullCount, + } + + if prealloc { + output.Buffers[0].SetBuffer(preallocatedMem) + } + + batch := &ExecBatch{Values: vals, Len: int64(length)} + p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) + + if prealloc { + // ensure that the buffer object is the same when we pass preallocated + // memory to it + p.Same(preallocatedMem, output.Buffers[0].Owner) + } else { + defer output.Buffers[0].Owner.Release() + } + + p.NotNil(output.Buffers[0].Buf) + expected := [2]byte{0, 0} + p.True(bytes.Equal(expected[:], output.Buffers[0].Buf)) + } + + var vals []Datum + const trueProb float64 = 0.5 + p.Run("Null Scalar", func() { + i32Val := scalar.MakeScalar(int32(3)) + vals = []Datum{NewDatum(i32Val), NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean))} + checkSetAll(vals, true) + checkSetAll(vals, false) + + arr := p.getBoolArr(int64(length), trueProb, 0) + defer arr.Release() + vals[0] = NewDatum(arr) + defer vals[0].Release() + checkSetAll(vals, true) + checkSetAll(vals, false) + }) + + p.Run("one all null", func() { + arrAllNulls := p.getBoolArr(int64(length), trueProb, 1) + defer arrAllNulls.Release() + arrHalf := p.getBoolArr(int64(length), trueProb, 0.5) + defer arrHalf.Release() + vals = []Datum{NewDatum(arrHalf), NewDatum(arrAllNulls)} + defer vals[0].Release() + defer vals[1].Release() + + checkSetAll(vals, true) + checkSetAll(vals, false) + }) + + p.Run("one value is NullType", func() { + nullarr := array.NewNull(length) + arr := p.getBoolArr(int64(length), trueProb, 0) + defer arr.Release() + vals = []Datum{NewDatum(arr), NewDatum(nullarr)} + defer vals[0].Release() + checkSetAll(vals, true) + checkSetAll(vals, false) + }) + + p.Run("Other scenarios", func() { + // an all-null bitmap is zero-copied over, even though + // there is a null-scalar earlier in the batch + outSpan := &exec.ArraySpan{ + Type: arrow.FixedWidthTypes.Boolean, + Len: int64(length), + } + arrAllNulls := p.getBoolArr(int64(length), trueProb, 1) + defer arrAllNulls.Release() + + batch := &ExecBatch{ + Values: []Datum{ + NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean)), + NewDatum(arrAllNulls), + }, + Len: int64(length), + } + defer batch.Values[1].Release() + + p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), outSpan)) + p.Same(arrAllNulls.Data().Buffers()[0], outSpan.Buffers[0].Owner) + outSpan.Buffers[0].Owner.Release() + }) +} + +func (p *PropagateNullsSuite) TestSingleValueWithNulls() { + const length int64 = 100 + arr := p.getBoolArr(length, 0.5, 0.5) + defer arr.Release() + + checkSliced := func(offset int64, prealloc bool, outOffset int64) { + // unaligned bitmap, zero copy not possible + sliced := array.NewSlice(arr, offset, int64(arr.Len())) + defer sliced.Release() + vals := []Datum{NewDatum(sliced)} + defer vals[0].Release() + + output := &exec.ArraySpan{ + Type: arrow.FixedWidthTypes.Boolean, + Len: vals[0].Len(), + Offset: outOffset, + } + + batch := &ExecBatch{Values: vals, Len: vals[0].Len()} + + var preallocatedBitmap *memory.Buffer + if prealloc { + preallocatedBitmap = memory.NewResizableBuffer(p.mem) + preallocatedBitmap.Resize(int(bitutil.BytesForBits(int64(sliced.Len()) + outOffset))) + defer preallocatedBitmap.Release() + output.Buffers[0].SetBuffer(preallocatedBitmap) + output.Buffers[0].SelfAlloc = true + } else { + p.EqualValues(0, output.Offset) + } + + p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) + if !prealloc { + parentBuf := arr.Data().Buffers()[0] + if offset == 0 { + // validity bitmap same, no slice + p.Same(parentBuf, output.Buffers[0].Owner) + } else if offset%8 == 0 { + // validity bitmap sliced + p.NotSame(parentBuf, output.Buffers[0].Owner) + p.Same(parentBuf, output.Buffers[0].Owner.Parent()) + defer output.Buffers[0].Owner.Release() + } else { + // new memory for offset not 0 mod 8 + p.NotSame(parentBuf, output.Buffers[0].Owner) + p.Nil(output.Buffers[0].Owner.Parent()) + defer output.Buffers[0].Owner.Release() + } + } else { + // preallocated, so check that the validity bitmap is unbothered + p.Same(preallocatedBitmap, output.Buffers[0].Owner) + } + + p.EqualValues(sliced.NullN(), output.UpdateNullCount()) + p.True(bitutil.BitmapEquals( + sliced.NullBitmapBytes(), output.Buffers[0].Buf, + int64(sliced.Data().Offset()), output.Offset, output.Len)) + p.assertValidityZeroExtraBits(output.Buffers[0].Buf, int(output.Len), int(output.Offset)) + } + + tests := []struct { + offset, outoffset int64 + prealloc bool + }{ + {8, 0, false}, + {7, 0, false}, + {8, 0, true}, + {7, 0, true}, + {8, 4, true}, + {7, 4, true}, + } + + for _, tt := range tests { + name := fmt.Sprintf("off=%d,prealloc=%t,outoff=%d", tt.offset, tt.prealloc, tt.outoffset) + p.Run(name, func() { + checkSliced(tt.offset, tt.prealloc, tt.outoffset) + }) + } +} + +func (p *PropagateNullsSuite) TestIntersectsNulls() { + const length = 16 + var ( + // 0b01111111 0b11001111 + bitmap1 = [8]byte{127, 207, 0, 0, 0, 0, 0, 0} + // 0b11111110 0b01111111 + bitmap2 = [8]byte{254, 127, 0, 0, 0, 0, 0, 0} + // 0b11101111 0b11111110 + bitmap3 = [8]byte{239, 254, 0, 0, 0, 0, 0, 0} + ) + + arr1 := array.NewData(arrow.FixedWidthTypes.Boolean, length, + []*memory.Buffer{memory.NewBufferBytes(bitmap1[:]), nil}, nil, array.UnknownNullCount, 0) + arr2 := array.NewData(arrow.FixedWidthTypes.Boolean, length, + []*memory.Buffer{memory.NewBufferBytes(bitmap2[:]), nil}, nil, array.UnknownNullCount, 0) + arr3 := array.NewData(arrow.FixedWidthTypes.Boolean, length, + []*memory.Buffer{memory.NewBufferBytes(bitmap3[:]), nil}, nil, array.UnknownNullCount, 0) + + checkCase := func(vals []Datum, exNullCount int, exBitmap []byte, prealloc bool, outoffset int) { + batch := &ExecBatch{Values: vals, Len: length} + + output := &exec.ArraySpan{Type: arrow.FixedWidthTypes.Boolean, Len: length} + + var nulls *memory.Buffer + if prealloc { + // make the buffer one byte bigger so we can have non-zero offsets + nulls = memory.NewResizableBuffer(p.mem) + nulls.Resize(3) + defer nulls.Release() + output.Buffers[0].SetBuffer(nulls) + output.Buffers[0].SelfAlloc = true + } else { + // non-zero output offset not permitted unless output memory is preallocated + p.Equal(0, outoffset) + } + + output.Offset = int64(outoffset) + + p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) + + // preallocated memory used + if prealloc { + p.Same(nulls, output.Buffers[0].Owner) + } else { + defer output.Buffers[0].Owner.Release() + } + + p.EqualValues(array.UnknownNullCount, output.Nulls) + p.EqualValues(exNullCount, output.UpdateNullCount()) + + p.True(bitutil.BitmapEquals(exBitmap, output.Buffers[0].Buf, 0, output.Offset, length)) + p.assertValidityZeroExtraBits(output.Buffers[0].Buf, int(output.Len), int(output.Offset)) + } + + p.Run("0b01101110 0b01001110", func() { + // 0b01101110 0b01001110 + expected := [2]byte{110, 78} + checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], false, 0) + checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], true, 0) + checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], true, 4) + }) + + p.Run("0b01111110 0b01001111", func() { + expected := [2]byte{126, 79} + checkCase([]Datum{NewDatum(arr1), NewDatum(arr2)}, 5, expected[:], false, 0) + checkCase([]Datum{NewDatum(arr1), NewDatum(arr2)}, 5, expected[:], true, 4) + }) +} + +func TestComputeInternals(t *testing.T) { + suite.Run(t, new(PropagateNullsSuite)) +} + +type ExecSpanItrSuite struct { + ComputeInternalsTestSuite + + iter spanIterator +} + +func (e *ExecSpanItrSuite) setupIterator(batch *ExecBatch, maxChunk int64) { + var err error + _, e.iter, err = iterateExecSpans(batch, maxChunk, true) + e.NoError(err) +} + +func (e *ExecSpanItrSuite) checkIteration(input *ExecBatch, chunksize int, exBatchSizes []int) { + e.setupIterator(input, int64(chunksize)) + var ( + batch exec.ExecSpan + curPos int64 + pos int64 + next bool + ) + + for _, sz := range exBatchSizes { + batch, pos, next = e.iter() + e.True(next) + e.EqualValues(sz, batch.Len) + + for j, val := range input.Values { + switch val := val.(type) { + case *ScalarDatum: + e.Truef(scalar.Equals(batch.Values[j].Scalar, val.Value), "expected: %s\ngot: %s", val.Value, batch.Values[j].Scalar) + case *ArrayDatum: + arr := val.MakeArray() + sl := array.NewSlice(arr, curPos, curPos+batch.Len) + got := batch.Values[j].Array.MakeArray() + + e.Truef(array.Equal(sl, got), "expected: %s\ngot: %s", sl, got) + + got.Release() + arr.Release() + sl.Release() + case *ChunkedDatum: + carr := val.Value + if batch.Len == 0 { + e.Zero(carr.Len()) + } else { + chkd := array.NewChunkedSlice(carr, curPos, curPos+batch.Len) + defer chkd.Release() + e.Len(chkd.Chunks(), 1) + got := batch.Values[j].Array.MakeArray() + defer got.Release() + e.Truef(array.Equal(got, chkd.Chunk(0)), "expected: %s\ngot: %s", chkd.Chunk(0), got) + } + } + } + + curPos += int64(sz) + e.EqualValues(curPos, pos) + } + + batch, pos, next = e.iter() + e.Zero(batch) + e.False(next) + e.EqualValues(input.Len, pos) +} + +func (e *ExecSpanItrSuite) TestBasics() { + const length = 100 + + arr1 := e.getInt32Arr(length, 0.1) + defer arr1.Release() + arr2 := e.getFloat64Arr(length, 0.1) + defer arr2.Release() + + input := &ExecBatch{ + Len: length, + Values: []Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(int32(3))}, + } + defer func() { + for _, v := range input.Values { + v.Release() + } + }() + + e.Run("simple", func() { + e.setupIterator(input, DefaultMaxChunkSize) + + batch, pos, next := e.iter() + e.True(next) + e.Len(batch.Values, 3) + e.EqualValues(length, batch.Len) + e.EqualValues(length, pos) + + in1 := input.Values[0].(*ArrayDatum).MakeArray() + defer in1.Release() + in2 := input.Values[1].(*ArrayDatum).MakeArray() + defer in2.Release() + out1 := batch.Values[0].Array.MakeArray() + defer out1.Release() + out2 := batch.Values[1].Array.MakeArray() + defer out2.Release() + + e.Truef(array.Equal(in1, out1), "expected: %s\ngot: %s", in1, out1) + e.Truef(array.Equal(in2, out2), "expected: %s\ngot: %s", in2, out2) + e.True(scalar.Equals(input.Values[2].(*ScalarDatum).Value, batch.Values[2].Scalar), input.Values[2].(*ScalarDatum).Value, batch.Values[2].Scalar) + + _, pos, next = e.iter() + e.EqualValues(length, pos) + e.False(next) + }) + + e.Run("iterations", func() { + e.checkIteration(input, 16, []int{16, 16, 16, 16, 16, 16, 4}) + }) +} + +func (e *ExecSpanItrSuite) TestInputValidation() { + arr1 := e.getInt32Arr(10, 0.1) + defer arr1.Release() + arr2 := e.getInt32Arr(9, 0.1) + defer arr2.Release() + + // length mismatch + batch := &ExecBatch{ + Values: []Datum{&ArrayDatum{arr1.Data()}, &ArrayDatum{arr2.Data()}}, + Len: 10, + } + + _, _, err := iterateExecSpans(batch, DefaultMaxChunkSize, true) + e.ErrorIs(err, arrow.ErrInvalid) + + // swap order of input + batch.Values = []Datum{&ArrayDatum{arr2.Data()}, &ArrayDatum{arr1.Data()}} + + _, _, err = iterateExecSpans(batch, DefaultMaxChunkSize, true) + e.ErrorIs(err, arrow.ErrInvalid) + + batch.Values = []Datum{&ArrayDatum{arr1.Data()}} + _, _, err = iterateExecSpans(batch, DefaultMaxChunkSize, true) + e.NoError(err) +} + +func (e *ExecSpanItrSuite) TestChunkedArrays() { + arr1 := e.getInt32Chunked([]int64{0, 20, 10}) + defer arr1.Release() + arr2 := e.getInt32Chunked([]int64{15, 15}) + defer arr2.Release() + arr3 := e.getInt32Arr(30, 0.1) + defer arr3.Release() + + batch := &ExecBatch{ + Values: []Datum{ + &ChunkedDatum{arr1}, &ChunkedDatum{arr2}, &ArrayDatum{arr3.Data()}, + NewDatum(int32(5)), NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean))}, + Len: 30, + } + + e.checkIteration(batch, 10, []int{10, 5, 5, 10}) + e.checkIteration(batch, 20, []int{15, 5, 10}) + e.checkIteration(batch, 30, []int{15, 5, 10}) +} + +func (e *ExecSpanItrSuite) TestZeroLengthInput() { + carr := arrow.NewChunked(arrow.PrimitiveTypes.Int32, []arrow.Array{}) + checkArgs := func(batch *ExecBatch) { + _, itr, err := iterateExecSpans(batch, DefaultMaxChunkSize, true) + e.NoError(err) + itrSpan, _, next := itr() + + e.False(next) + e.Zero(itrSpan) + } + + input := &ExecBatch{Len: 0} + + // zero-length chunkedarray with zero chunks + input.Values = []Datum{&ChunkedDatum{carr}} + checkArgs(input) + + // zero-length array + arr := e.getInt32Arr(0, 0.1) + defer arr.Release() + input.Values = []Datum{&ArrayDatum{arr.Data()}} + checkArgs(input) + + // chunkedarray with single empty chunk + carr = e.getInt32Chunked([]int64{0}) + input.Values = []Datum{&ChunkedDatum{carr}} + checkArgs(input) +} + +func TestExecSpanIterator(t *testing.T) { + suite.Run(t, new(ExecSpanItrSuite)) +} diff --git a/go/arrow/compute/exec_test.go b/go/arrow/compute/exec_test.go new file mode 100644 index 0000000000000..df0c67eeffdaf --- /dev/null +++ b/go/arrow/compute/exec_test.go @@ -0,0 +1,377 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compute + +import ( + "strings" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/compute/internal/exec" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/scalar" + "github.com/stretchr/testify/suite" +) + +func ExecCopyArray(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { + debug.Assert(len(batch.Values) == 1, "wrong number of values") + valueSize := int64(batch.Values[0].Type().(arrow.FixedWidthDataType).BitWidth() / 8) + + arg0 := batch.Values[0].Array + dst := out.Buffers[1].Buf[out.Offset*valueSize:] + src := arg0.Buffers[1].Buf[arg0.Offset*valueSize:] + copy(dst, src[:batch.Len*valueSize]) + return nil +} + +func ExecComputedBitmap(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { + // propagate nulls not used. check that out bitmap isn't the same already + // as the input bitmap + arg0 := batch.Values[0].Array + if bitutil.CountSetBits(arg0.Buffers[1].Buf, int(arg0.Offset), int(batch.Len)) > 0 { + // check that the bitmap hasn't already been copied + debug.Assert(!bitutil.BitmapEquals(arg0.Buffers[0].Buf, out.Buffers[0].Buf, + arg0.Offset, out.Offset, batch.Len), "bitmap should not have already been copied") + } + + bitutil.CopyBitmap(arg0.Buffers[0].Buf, int(arg0.Offset), int(batch.Len), out.Buffers[0].Buf, int(out.Offset)) + return ExecCopyArray(ctx, batch, out) +} + +func ExecNoPreallocatedData(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { + // validity preallocated, not data + debug.Assert(out.Offset == 0, "invalid offset for non-prealloc") + valueSize := int64(batch.Values[0].Type().(arrow.FixedWidthDataType).BitWidth() / 8) + out.Buffers[1].SetBuffer(ctx.Allocate(int(out.Len * valueSize))) + out.Buffers[1].SelfAlloc = true + return ExecCopyArray(ctx, batch, out) +} + +func ExecNoPreallocatedAnything(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { + // neither validity nor data preallocated + debug.Assert(out.Offset == 0, "invalid offset for non-prealloc") + out.Buffers[0].SetBuffer(ctx.AllocateBitmap(out.Len)) + out.Buffers[0].SelfAlloc = true + arg0 := batch.Values[0].Array + bitutil.CopyBitmap(arg0.Buffers[0].Buf, int(arg0.Offset), int(batch.Len), out.Buffers[0].Buf, 0) + + // reuse kernel that allocates data + return ExecNoPreallocatedData(ctx, batch, out) +} + +type ExampleOptions struct { + Value scalar.Scalar +} + +func (e *ExampleOptions) TypeName() string { return "example" } + +type ExampleState struct { + Value scalar.Scalar +} + +func InitStateful(_ *exec.KernelCtx, args exec.KernelInitArgs) (exec.KernelState, error) { + value := args.Options.(*ExampleOptions).Value + return &ExampleState{Value: value}, nil +} + +func ExecStateful(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { + state := ctx.State.(*ExampleState) + multiplier := state.Value.(*scalar.Int32).Value + + arg0 := batch.Values[0].Array + arg0Data := exec.GetSpanValues[int32](&arg0, 1) + dst := exec.GetSpanValues[int32](out, 1) + for i, v := range arg0Data { + dst[i] = v * multiplier + } + return nil +} + +func ExecAddInt32(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { + left := exec.GetSpanValues[int32](&batch.Values[0].Array, 1) + right := exec.GetSpanValues[int32](&batch.Values[1].Array, 1) + outValues := exec.GetSpanValues[int32](out, 1) + for i := 0; i < int(batch.Len); i++ { + outValues[i] = left[i] + right[i] + } + return nil +} + +type CallScalarFuncSuite struct { + ComputeInternalsTestSuite +} + +func (c *CallScalarFuncSuite) addCopyFuncs() { + registry = GetFunctionRegistry() + + fn := NewScalarFunction("test_copy", Unary(), EmptyFuncDoc) + types := []arrow.DataType{arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float64} + for _, t := range types { + c.NoError(fn.AddNewKernel([]exec.InputType{exec.NewExactInput(t)}, + exec.NewOutputType(t), ExecCopyArray, nil)) + } + c.True(registry.AddFunction(fn, false)) + + // a version which doesn't want the executor to call propagatenulls + fn2 := NewScalarFunction("test_copy_computed_bitmap", Unary(), EmptyFuncDoc) + kernel := exec.NewScalarKernel([]exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Uint8)}, + exec.NewOutputType(arrow.PrimitiveTypes.Uint8), ExecComputedBitmap, nil) + kernel.NullHandling = exec.NullComputedPrealloc + c.NoError(fn2.AddKernel(kernel)) + c.True(registry.AddFunction(fn2, false)) +} + +func (c *CallScalarFuncSuite) addNoPreallocFuncs() { + registry = GetFunctionRegistry() + + // a function that allocates its own output memory. we have cases + // for both non-preallocated data and non-preallocated bitmap + f1 := NewScalarFunction("test_nopre_data", Unary(), EmptyFuncDoc) + f2 := NewScalarFunction("test_nopre_validity_or_data", Unary(), EmptyFuncDoc) + + kernel := exec.NewScalarKernel( + []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Uint8)}, + exec.NewOutputType(arrow.PrimitiveTypes.Uint8), + ExecNoPreallocatedData, nil) + kernel.MemAlloc = exec.MemNoPrealloc + c.NoError(f1.AddKernel(kernel)) + + kernel.ExecFn = ExecNoPreallocatedAnything + kernel.NullHandling = exec.NullComputedNoPrealloc + c.NoError(f2.AddKernel(kernel)) + + c.True(registry.AddFunction(f1, false)) + c.True(registry.AddFunction(f2, false)) +} + +func (c *CallScalarFuncSuite) addStatefulFunc() { + registry := GetFunctionRegistry() + + // this functions behavior depends on a static parameter that + // is made available to the execution through its options object + fn := NewScalarFunction("test_stateful", Unary(), EmptyFuncDoc) + + c.NoError(fn.AddNewKernel([]exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, + exec.NewOutputType(arrow.PrimitiveTypes.Int32), ExecStateful, InitStateful)) + + c.True(registry.AddFunction(fn, false)) +} + +func (c *CallScalarFuncSuite) addScalarFunc() { + registry := GetFunctionRegistry() + + fn := NewScalarFunction("test_scalar_add_int32", Binary(), EmptyFuncDoc) + c.NoError(fn.AddNewKernel([]exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int32), + exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, + exec.NewOutputType(arrow.PrimitiveTypes.Int32), ExecAddInt32, nil)) + c.True(registry.AddFunction(fn, false)) +} + +func (c *CallScalarFuncSuite) SetupSuite() { + c.addCopyFuncs() + c.addNoPreallocFuncs() + c.addStatefulFunc() + c.addScalarFunc() +} + +func (c *CallScalarFuncSuite) TestArgumentValidation() { + // copy accepts only a single array arg + arr := c.getInt32Arr(10, 0.1) + defer arr.Release() + d1 := &ArrayDatum{Value: arr.Data()} + + c.Run("too many args", func() { + args := []Datum{d1, d1} + _, err := CallFunction(c.ctx.Ctx, "test_copy", nil, args...) + c.ErrorIs(err, arrow.ErrInvalid) + }) + + c.Run("too few args", func() { + _, err := CallFunction(c.ctx.Ctx, "test_copy", nil) + c.ErrorIs(err, arrow.ErrInvalid) + }) + + d1Scalar := NewDatum(int32(5)) + result, err := CallFunction(c.ctx.Ctx, "test_copy", nil, d1) + c.NoError(err) + result.Release() + result, err = CallFunction(c.ctx.Ctx, "test_copy", nil, d1Scalar) + c.NoError(err) + result.Release() +} + +func (c *CallScalarFuncSuite) TestPreallocationCases() { + nullProb := float64(0.2) + arr := c.getUint8Arr(100, nullProb) + defer arr.Release() + + funcNames := []string{"test_copy", "test_copy_computed_bitmap"} + for _, funcName := range funcNames { + c.Run(funcName, func() { + c.resetCtx() + + c.Run("single output default", func() { + result, err := CallFunction(c.ctx.Ctx, funcName, nil, &ArrayDatum{arr.Data()}) + c.NoError(err) + defer result.Release() + c.Equal(KindArray, result.Kind()) + c.assertDatumEqual(arr, result) + }) + + c.Run("exec chunks", func() { + // set the exec_chunksize to be smaller so now we have + // several invocations of the kernel, + // but still only one output array + c.execCtx.ChunkSize = 80 + result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) + c.NoError(err) + defer result.Release() + c.Equal(KindArray, result.Kind()) + c.assertDatumEqual(arr, result) + }) + + c.Run("not multiple 8 chunk", func() { + // chunksize is not a multiple of 8 + c.execCtx.ChunkSize = 11 + result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) + c.NoError(err) + defer result.Release() + c.Equal(KindArray, result.Kind()) + c.assertDatumEqual(arr, result) + }) + + c.Run("chunked", func() { + // input is chunked, output is one big chunk + chk1, chk2 := array.NewSlice(arr, 0, 10), array.NewSlice(arr, 10, int64(arr.Len())) + defer chk1.Release() + defer chk2.Release() + carr := arrow.NewChunked(arr.DataType(), []arrow.Array{chk1, chk2}) + defer carr.Release() + + result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ChunkedDatum{carr}) + c.NoError(err) + defer result.Release() + c.Equal(KindChunked, result.Kind()) + actual := result.(*ChunkedDatum).Value + c.Len(actual.Chunks(), 1) + c.Truef(array.ChunkedEqual(actual, carr), "expected: %s\ngot: %s", carr, actual) + }) + + c.Run("independent", func() { + // preallocate independently for each batch + c.execCtx.PreallocContiguous = false + c.execCtx.ChunkSize = 40 + result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) + c.NoError(err) + defer result.Release() + c.Equal(KindChunked, result.Kind()) + + carr := result.(*ChunkedDatum).Value + c.Len(carr.Chunks(), 3) + sl := array.NewSlice(arr, 0, 40) + defer sl.Release() + c.assertArrayEqual(sl, carr.Chunk(0)) + sl = array.NewSlice(arr, 40, 80) + defer sl.Release() + c.assertArrayEqual(sl, carr.Chunk(1)) + sl = array.NewSlice(arr, 80, int64(arr.Len())) + defer sl.Release() + c.assertArrayEqual(sl, carr.Chunk(2)) + }) + }) + } +} + +func (c *CallScalarFuncSuite) TestBasicNonStandardCases() { + // test some more cases + // + // * validity bitmap computed by kernel rather than propagate nulls + // * data not pre-allocated + // * validity bitmap not pre-allocated + + nullProb := float64(0.2) + arr := c.getUint8Arr(1000, nullProb) + defer arr.Release() + args := []Datum{&ArrayDatum{arr.Data()}} + + for _, funcName := range []string{"test_nopre_data", "test_nopre_validity_or_data"} { + c.Run("funcName", func() { + c.resetCtx() + c.Run("single output default", func() { + result, err := CallFunction(c.ctx.Ctx, funcName, nil, args...) + c.NoError(err) + defer result.Release() + c.Equal(KindArray, result.Kind()) + c.assertDatumEqual(arr, result) + }) + + c.Run("split into 3 chunks", func() { + c.execCtx.ChunkSize = 400 + result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, args...) + c.NoError(err) + defer result.Release() + + c.Equal(KindChunked, result.Kind()) + + carr := result.(*ChunkedDatum).Value + c.Len(carr.Chunks(), 3) + sl := array.NewSlice(arr, 0, 400) + defer sl.Release() + c.assertArrayEqual(sl, carr.Chunk(0)) + sl = array.NewSlice(arr, 400, 800) + defer sl.Release() + c.assertArrayEqual(sl, carr.Chunk(1)) + sl = array.NewSlice(arr, 800, int64(arr.Len())) + defer sl.Release() + c.assertArrayEqual(sl, carr.Chunk(2)) + }) + }) + } +} + +func (c *CallScalarFuncSuite) TestStatefulKernel() { + input, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3, null, 5]`)) + defer input.Release() + + multiplier := scalar.MakeScalar(int32(2)) + expected, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[2, 4, 6, null, 10]`)) + defer expected.Release() + + options := &ExampleOptions{multiplier} + result, err := CallFunction(c.ctx.Ctx, "test_stateful", options, &ArrayDatum{input.Data()}) + c.NoError(err) + defer result.Release() + c.assertDatumEqual(expected, result) +} + +func (c *CallScalarFuncSuite) TestScalarFunction() { + args := []Datum{NewDatum(int32(5)), NewDatum(int32(7))} + result, err := CallFunction(c.ctx.Ctx, "test_scalar_add_int32", nil, args...) + c.NoError(err) + defer result.Release() + + c.Equal(KindScalar, result.Kind()) + expected := scalar.MakeScalar(int32(12)) + c.True(scalar.Equals(expected, result.(*ScalarDatum).Value)) +} + +func TestCallScalarFunctions(t *testing.T) { + suite.Run(t, new(CallScalarFuncSuite)) +} diff --git a/go/arrow/compute/executor.go b/go/arrow/compute/executor.go new file mode 100644 index 0000000000000..72f6cf4623b7c --- /dev/null +++ b/go/arrow/compute/executor.go @@ -0,0 +1,802 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compute + +import ( + "context" + "fmt" + "math" + "sync" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/compute/internal/exec" + "github.com/apache/arrow/go/v10/arrow/internal" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" +) + +// ExecCtx holds simple contextual information for execution +// such as the default ChunkSize for batch iteration, whether or not +// to ensure contiguous preallocations for kernels that want preallocation, +// and a reference to the desired function registry to use. +// +// An ExecCtx should be placed into a context.Context by using +// SetExecCtx and GetExecCtx to pass it along for execution. +type ExecCtx struct { + ChunkSize int64 + PreallocContiguous bool + Registry FunctionRegistry + ExecChannelSize int +} + +type ctxExecKey struct{} + +const DefaultMaxChunkSize = math.MaxInt64 + +// global default ExecCtx object, initialized with the +// default max chunk size, contiguous preallocations, and +// the default function registry. +var defaultExecCtx ExecCtx + +func init() { + defaultExecCtx.ChunkSize = DefaultMaxChunkSize + defaultExecCtx.PreallocContiguous = true + defaultExecCtx.Registry = GetFunctionRegistry() + defaultExecCtx.ExecChannelSize = 10 +} + +// SetExecCtx returns a new child context containing the passed in ExecCtx +func SetExecCtx(ctx context.Context, e ExecCtx) context.Context { + return context.WithValue(ctx, ctxExecKey{}, e) +} + +// GetExecCtx returns an embedded ExecCtx from the provided context. +// If it does not contain an ExecCtx, then the default one is returned. +func GetExecCtx(ctx context.Context) ExecCtx { + e, ok := ctx.Value(ctxExecKey{}).(ExecCtx) + if ok { + return e + } + return defaultExecCtx +} + +// ExecBatch is a unit of work for kernel execution. It contains a collection +// of Array and Scalar values. +// +// ExecBatch is semantically similar to a RecordBatch but for a SQL-style +// execution context. It represents a collection or records, but constant +// "columns" are represented by Scalar values rather than having to be +// converted into arrays with repeated values. +type ExecBatch struct { + Values []Datum + // Guarantee is a predicate Expression guaranteed to evaluate to true for + // all rows in this batch. + Guarantee Expression + // Len is the semantic length of this ExecBatch. When the values are + // all scalars, the length should be set to 1 for non-aggregate kernels. + // Otherwise the length is taken from the array values. Aggregate kernels + // can have an ExecBatch formed by projecting just the partition columns + // from a batch in which case it would have scalar rows with length > 1 + // + // If the array values are of length 0, then the length is 0 regardless of + // whether any values are Scalar. + Len int64 +} + +func (e ExecBatch) NumValues() int { return len(e.Values) } + +// simple struct for defining how to preallocate a particular buffer. +type bufferPrealloc struct { + bitWidth int + addLen int +} + +func allocateDataBuffer(ctx *exec.KernelCtx, length, bitWidth int) *memory.Buffer { + switch bitWidth { + case 1: + return ctx.AllocateBitmap(int64(length)) + default: + bufsiz := int(bitutil.BytesForBits(int64(length * bitWidth))) + return ctx.Allocate(bufsiz) + } +} + +func addComputeDataPrealloc(dt arrow.DataType, widths []bufferPrealloc) []bufferPrealloc { + if typ, ok := dt.(arrow.FixedWidthDataType); ok { + return append(widths, bufferPrealloc{bitWidth: typ.BitWidth()}) + } + + switch dt.ID() { + case arrow.BINARY, arrow.STRING, arrow.LIST, arrow.MAP: + return append(widths, bufferPrealloc{bitWidth: 32, addLen: 1}) + case arrow.LARGE_BINARY, arrow.LARGE_STRING, arrow.LARGE_LIST: + return append(widths, bufferPrealloc{bitWidth: 64, addLen: 1}) + } + return widths +} + +// enum to define a generalized assumption of the nulls in the inputs +type nullGeneralization int8 + +const ( + nullGenPerhapsNull nullGeneralization = iota + nullGenAllValid + nullGenAllNull +) + +func getNullGen(val *exec.ExecValue) nullGeneralization { + dtID := val.Type().ID() + switch { + case dtID == arrow.NULL: + return nullGenAllNull + case !internal.DefaultHasValidityBitmap(dtID): + return nullGenAllValid + case val.IsScalar(): + if val.Scalar.IsValid() { + return nullGenAllValid + } + return nullGenAllNull + default: + arr := val.Array + // do not count if they haven't been counted already + if arr.Nulls == 0 || arr.Buffers[0].Buf == nil { + return nullGenAllValid + } + + if arr.Nulls == arr.Len { + return nullGenAllNull + } + } + return nullGenPerhapsNull +} + +func getNullGenDatum(datum Datum) nullGeneralization { + var val exec.ExecValue + switch datum.Kind() { + case KindArray: + val.Array.SetMembers(datum.(*ArrayDatum).Value) + case KindScalar: + val.Scalar = datum.(*ScalarDatum).Value + case KindChunked: + return nullGenPerhapsNull + default: + debug.Assert(false, "should be array, scalar, or chunked!") + return nullGenPerhapsNull + } + return getNullGen(&val) +} + +// populate the validity bitmaps with the intersection of the nullity +// of the arguments. If a preallocated bitmap is not provided, then one +// will be allocated if needed (in some cases a bitmap can be zero-copied +// from the arguments). If any Scalar value is null, then the entire +// validity bitmap will be set to null. +func propagateNulls(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ArraySpan) (err error) { + if out.Type.ID() == arrow.NULL { + // null output type is a no-op (rare but it happens) + return + } + + // this function is ONLY able to write into output with non-zero offset + // when the bitmap is preallocated. + if out.Offset != 0 && out.Buffers[0].Buf == nil { + return fmt.Errorf("%w: can only propagate nulls into pre-allocated memory when output offset is non-zero", arrow.ErrInvalid) + } + + var ( + arrsWithNulls = make([]*exec.ArraySpan, 0) + isAllNull bool + prealloc bool = out.Buffers[0].Buf != nil + ) + + for i := range batch.Values { + v := &batch.Values[i] + nullGen := getNullGen(v) + if nullGen == nullGenAllNull { + isAllNull = true + } + if nullGen != nullGenAllValid && v.IsArray() { + arrsWithNulls = append(arrsWithNulls, &v.Array) + } + } + + outBitmap := out.Buffers[0].Buf + if isAllNull { + // an all-null value gives us a short circuit opportunity + // output should all be null + out.Nulls = out.Len + if prealloc { + bitutil.SetBitsTo(outBitmap, out.Offset, out.Len, false) + return + } + + // walk all the values with nulls instead of breaking on the first + // in case we find a bitmap that can be reused in the non-preallocated case + for _, arr := range arrsWithNulls { + if arr.Nulls == arr.Len && arr.Buffers[0].Owner != nil { + buf := arr.GetBuffer(0) + buf.Retain() + out.Buffers[0].Buf = buf.Bytes() + out.Buffers[0].Owner = buf + return + } + } + + buf := ctx.AllocateBitmap(int64(out.Len)) + out.Buffers[0].Owner = buf + out.Buffers[0].Buf = buf.Bytes() + out.Buffers[0].SelfAlloc = true + bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, false) + return + } + + out.Nulls = array.UnknownNullCount + switch len(arrsWithNulls) { + case 0: + out.Nulls = 0 + if prealloc { + bitutil.SetBitsTo(outBitmap, out.Offset, out.Len, true) + } + case 1: + arr := arrsWithNulls[0] + out.Nulls = arr.Nulls + if prealloc { + bitutil.CopyBitmap(arr.Buffers[0].Buf, int(arr.Offset), int(arr.Len), outBitmap, int(out.Offset)) + return + } + + switch { + case arr.Offset == 0: + out.Buffers[0] = arr.Buffers[0] + out.Buffers[0].Owner.Retain() + case arr.Offset%8 == 0: + buf := memory.SliceBuffer(arr.GetBuffer(0), int(arr.Offset)/8, int(bitutil.BytesForBits(arr.Len))) + out.Buffers[0].Buf = buf.Bytes() + out.Buffers[0].Owner = buf + default: + buf := ctx.AllocateBitmap(int64(out.Len)) + out.Buffers[0].Owner = buf + out.Buffers[0].Buf = buf.Bytes() + out.Buffers[0].SelfAlloc = true + bitutil.CopyBitmap(arr.Buffers[0].Buf, int(arr.Offset), int(arr.Len), out.Buffers[0].Buf, 0) + } + return + + default: + if !prealloc { + buf := ctx.AllocateBitmap(int64(out.Len)) + out.Buffers[0].Owner = buf + out.Buffers[0].Buf = buf.Bytes() + out.Buffers[0].SelfAlloc = true + outBitmap = out.Buffers[0].Buf + } + + acc := func(left, right *exec.ArraySpan) { + debug.Assert(left.Buffers[0].Buf != nil, "invalid intersection for null propagation") + debug.Assert(right.Buffers[0].Buf != nil, "invalid intersection for null propagation") + bitutil.BitmapAnd(left.Buffers[0].Buf, right.Buffers[0].Buf, left.Offset, right.Offset, outBitmap, out.Offset, out.Len) + } + + acc(arrsWithNulls[0], arrsWithNulls[1]) + for _, arr := range arrsWithNulls[2:] { + acc(out, arr) + } + } + return +} + +func inferBatchLength(values []Datum) (length int64, allSame bool) { + length, allSame = -1, true + areAllScalar := true + for _, arg := range values { + switch arg := arg.(type) { + case *ArrayDatum: + argLength := arg.Len() + if length < 0 { + length = argLength + } else { + if length != argLength { + allSame = false + return + } + } + areAllScalar = false + case *ChunkedDatum: + argLength := arg.Len() + if length < 0 { + length = argLength + } else { + if length != argLength { + allSame = false + return + } + } + areAllScalar = false + } + } + + if areAllScalar && len(values) > 0 { + length = 1 + } else if length < 0 { + length = 0 + } + allSame = true + return +} + +// kernelExecutor is the interface for all executors to initialize and +// call kernel execution functions on batches. +type kernelExecutor interface { + // Init must be called *after* the kernel's init method and any + // KernelState must be set into the KernelCtx *before* calling + // this Init method. This is to faciliate the case where + // Init may be expensive and does not need to be called + // again for each execution of the kernel. For example, + // the same lookup table can be re-used for all scanned batches + // in a dataset filter. + Init(*exec.KernelCtx, exec.KernelInitArgs) error + // Execute the kernel for the provided batch and pass the resulting + // Datum values to the provided channel. + Execute(context.Context, *ExecBatch, chan<- Datum) error + // WrapResults exists for the case where an executor wants to post process + // the batches of result datums. Such as creating a ChunkedArray from + // multiple output batches or so on. Results from individual batch + // executions should be read from the out channel, and WrapResults should + // return the final Datum result. + WrapResults(ctx context.Context, out <-chan Datum, chunkedArgs bool) Datum + // CheckResultType checks the actual result type against the resolved + // output type. If the types don't match an error is returned + CheckResultType(out Datum) error + + clear() +} + +// the base implementation for executing non-aggregate kernels. +type nonAggExecImpl struct { + ctx *exec.KernelCtx + ectx ExecCtx + kernel exec.NonAggKernel + outType arrow.DataType + numOutBuf int + dataPrealloc []bufferPrealloc + preallocValidity bool +} + +func (e *nonAggExecImpl) clear() { + e.ctx, e.kernel, e.outType = nil, nil, nil + if e.dataPrealloc != nil { + e.dataPrealloc = e.dataPrealloc[:0] + } +} + +func (e *nonAggExecImpl) Init(ctx *exec.KernelCtx, args exec.KernelInitArgs) (err error) { + e.ctx, e.kernel = ctx, args.Kernel.(exec.NonAggKernel) + e.outType, err = e.kernel.GetSig().OutType.Resolve(ctx, args.Inputs) + e.ectx = GetExecCtx(ctx.Ctx) + return +} + +func (e *nonAggExecImpl) prepareOutput(length int) *exec.ExecResult { + var nullCount int = array.UnknownNullCount + + if e.kernel.GetNullHandling() == exec.NullNoOutput { + nullCount = 0 + } + + output := &exec.ArraySpan{ + Type: e.outType, + Len: int64(length), + Nulls: int64(nullCount), + } + + if e.preallocValidity { + buf := e.ctx.AllocateBitmap(int64(length)) + output.Buffers[0].Owner = buf + output.Buffers[0].Buf = buf.Bytes() + output.Buffers[0].SelfAlloc = true + } + + for i, pre := range e.dataPrealloc { + if pre.bitWidth >= 0 { + buf := allocateDataBuffer(e.ctx, length+pre.addLen, pre.bitWidth) + output.Buffers[i+1].Owner = buf + output.Buffers[i+1].Buf = buf.Bytes() + output.Buffers[i+1].SelfAlloc = true + } + } + + return output +} + +func (e *nonAggExecImpl) CheckResultType(out Datum) error { + typ := out.(ArrayLikeDatum).Type() + if typ != nil && !arrow.TypeEqual(e.outType, typ) { + return fmt.Errorf("%w: kernel type result mismatch: declared as %s, actual is %s", + arrow.ErrType, e.outType, typ) + } + return nil +} + +type spanIterator func() (exec.ExecSpan, int64, bool) + +type scalarExecutor struct { + nonAggExecImpl + + elideValidityBitmap bool + preallocAllBufs bool + preallocContiguous bool + allScalars bool + iter spanIterator + iterLen int64 +} + +func (s *scalarExecutor) Execute(ctx context.Context, batch *ExecBatch, data chan<- Datum) (err error) { + s.allScalars, s.iter, err = iterateExecSpans(batch, s.ectx.ChunkSize, true) + if err != nil { + return + } + + s.iterLen = batch.Len + + if batch.Len == 0 { + result := array.MakeArrayOfNull(exec.GetAllocator(s.ctx.Ctx), s.outType, 0) + defer result.Release() + out := &exec.ArraySpan{} + out.SetMembers(result.Data()) + return s.emitResult(out, data) + } + + if err = s.setupPrealloc(batch.Len, batch.Values); err != nil { + return + } + + return s.executeSpans(data) +} + +func (s *scalarExecutor) WrapResults(ctx context.Context, out <-chan Datum, hasChunked bool) Datum { + var ( + output Datum + acc []arrow.Array + ) + + toChunked := func() { + acc = output.(ArrayLikeDatum).Chunks() + output.Release() + output = nil + } + + // get first output + select { + case <-ctx.Done(): + return nil + case output = <-out: + // if the inputs contained at least one chunked array + // then we want to return chunked output + if hasChunked { + toChunked() + } + } + + for { + select { + case <-ctx.Done(): + // context is done, either cancelled or a timeout. + // either way, we end early and return what we've got so far. + return output + case o, ok := <-out: + if !ok { // channel closed, wrap it up + if output != nil { + return output + } + + for _, c := range acc { + defer c.Release() + } + + chkd := arrow.NewChunked(s.outType, acc) + defer chkd.Release() + return NewDatum(chkd) + } + + // if we get multiple batches of output, then we need + // to return it as a chunked array. + if acc == nil { + toChunked() + } + + defer o.Release() + if o.Len() == 0 { // skip any empty batches + continue + } + + acc = append(acc, o.(*ArrayDatum).MakeArray()) + } + } +} + +func (s *scalarExecutor) executeSpans(data chan<- Datum) (err error) { + var ( + input exec.ExecSpan + output exec.ExecResult + next bool + ) + + if s.preallocContiguous { + // make one big output alloc + prealloc := s.prepareOutput(int(s.iterLen)) + output = *prealloc + + output.Offset = 0 + var resultOffset int64 + var nextOffset int64 + for err == nil { + if input, nextOffset, next = s.iter(); !next { + break + } + output.SetSlice(resultOffset, input.Len) + err = s.executeSingleSpan(&input, &output) + resultOffset = nextOffset + } + if err != nil { + return + } + + return s.emitResult(prealloc, data) + } + + // fully preallocating, but not contiguously + // we (maybe) preallocate only for the output of processing + // the current chunk + for err == nil { + if input, _, next = s.iter(); !next { + break + } + + output = *s.prepareOutput(int(input.Len)) + if err = s.executeSingleSpan(&input, &output); err != nil { + return + } + err = s.emitResult(&output, data) + } + + return +} + +func (s *scalarExecutor) executeSingleSpan(input *exec.ExecSpan, out *exec.ExecResult) error { + switch { + case out.Type.ID() == arrow.NULL: + out.Nulls = out.Len + case s.kernel.GetNullHandling() == exec.NullIntersection: + if !s.elideValidityBitmap { + propagateNulls(s.ctx, input, out) + } + case s.kernel.GetNullHandling() == exec.NullNoOutput: + out.Nulls = 0 + } + return s.kernel.Exec(s.ctx, input, out) +} + +func (s *scalarExecutor) setupPrealloc(totalLen int64, args []Datum) error { + s.numOutBuf = len(s.outType.Layout().Buffers) + outTypeID := s.outType.ID() + // default to no validity pre-allocation for the following cases: + // - Output Array is NullArray + // - kernel.NullHandling is ComputeNoPrealloc or OutputNotNull + s.preallocValidity = false + + if outTypeID != arrow.NULL { + switch s.kernel.GetNullHandling() { + case exec.NullComputedPrealloc: + s.preallocValidity = true + case exec.NullIntersection: + s.elideValidityBitmap = true + for _, a := range args { + nullGen := getNullGenDatum(a) == nullGenAllValid + s.elideValidityBitmap = s.elideValidityBitmap && nullGen + } + s.preallocValidity = !s.elideValidityBitmap + case exec.NullNoOutput: + s.elideValidityBitmap = true + } + } + + if s.kernel.GetMemAlloc() == exec.MemPrealloc { + s.dataPrealloc = addComputeDataPrealloc(s.outType, s.dataPrealloc) + } + + // validity bitmap either preallocated or elided, and all data buffers allocated + // this is basically only true for primitive types that are not dict-encoded + s.preallocAllBufs = + ((s.preallocValidity || s.elideValidityBitmap) && len(s.dataPrealloc) == (s.numOutBuf-1) && + !arrow.IsNested(outTypeID) && outTypeID != arrow.DICTIONARY) + + // contiguous prealloc only possible on non-nested types if all + // buffers are preallocated. otherwise we have to go chunk by chunk + // + // some kernels are also unable to write into sliced outputs, so + // we respect the kernel's attributes + s.preallocContiguous = + (s.ectx.PreallocContiguous && s.kernel.CanFillSlices() && + s.preallocAllBufs) + + return nil +} + +func (s *scalarExecutor) emitResult(resultData *exec.ArraySpan, data chan<- Datum) error { + var output Datum + if s.allScalars { + // we boxed scalar inputs as ArraySpan so now we have to unbox the output + arr := resultData.MakeArray() + defer arr.Release() + sc, err := scalar.GetScalar(arr, 0) + if err != nil { + return err + } + output = NewDatum(sc) + } else { + d := resultData.MakeData() + defer d.Release() + output = NewDatum(d) + } + data <- output + return nil +} + +func checkAllIsValue(vals []Datum) error { + for _, v := range vals { + if !DatumIsValue(v) { + return fmt.Errorf("%w: tried executing function with non-value type: %s", + arrow.ErrInvalid, v) + } + } + return nil +} + +func checkIfAllScalar(batch *ExecBatch) bool { + for _, v := range batch.Values { + if v.Kind() != KindScalar { + return false + } + } + return batch.NumValues() > 0 +} + +// iterateExecSpans sets up and returns a function which can iterate a batch +// according to the chunk sizes. If the inputs contain chunked arrays, then +// we will find the min(chunk sizes, maxChunkSize) to ensure we return +// contiguous spans to execute on. +// +// the iteration function returns the next span to execute on, the current +// position in the full batch, and a boolean indicating whether or not +// a span was actually returned (there is data to process). +func iterateExecSpans(batch *ExecBatch, maxChunkSize int64, promoteIfAllScalar bool) (haveAllScalars bool, itr spanIterator, err error) { + if batch.NumValues() > 0 { + inferred, allArgsSame := inferBatchLength(batch.Values) + if inferred != batch.Len { + return false, nil, fmt.Errorf("%w: value lengths differed from execbatch length", arrow.ErrInvalid) + } + if !allArgsSame { + return false, nil, fmt.Errorf("%w: array args must all be the same length", arrow.ErrInvalid) + } + } + + var ( + args []Datum = batch.Values + haveChunked bool + chunkIdxes = make([]int, len(args)) + valuePositions = make([]int64, len(args)) + valueOffsets = make([]int64, len(args)) + pos, length int64 = 0, batch.Len + ) + haveAllScalars = checkIfAllScalar(batch) + maxChunkSize = exec.Min(length, maxChunkSize) + + span := exec.ExecSpan{Values: make([]exec.ExecValue, len(args)), Len: 0} + for i, a := range args { + switch arg := a.(type) { + case *ScalarDatum: + span.Values[i].Scalar = arg.Value + case *ArrayDatum: + span.Values[i].Array.SetMembers(arg.Value) + valueOffsets[i] = int64(arg.Value.Offset()) + case *ChunkedDatum: + // populate from first chunk + carr := arg.Value + if len(carr.Chunks()) > 0 { + arr := carr.Chunk(0).Data() + span.Values[i].Array.SetMembers(arr) + valueOffsets[i] = int64(arr.Offset()) + } else { + // fill as zero len + exec.FillZeroLength(carr.DataType(), &span.Values[i].Array) + } + haveChunked = true + } + } + + if haveAllScalars && promoteIfAllScalar { + exec.PromoteExecSpanScalars(span) + } + + nextChunkSpan := func(iterSz int64, span exec.ExecSpan) int64 { + for i := 0; i < len(args) && iterSz > 0; i++ { + // if the argument is not chunked, it's either a scalar or an array + // in which case it doesn't influence the size of the span + chunkedArg, ok := args[i].(*ChunkedDatum) + if !ok { + continue + } + + arg := chunkedArg.Value + if len(arg.Chunks()) == 0 { + iterSz = 0 + continue + } + + var curChunk arrow.Array + for { + curChunk = arg.Chunk(chunkIdxes[i]) + if valuePositions[i] == int64(curChunk.Len()) { + // chunk is zero-length, or was exhausted in the previous + // iteration, move to next chunk + chunkIdxes[i]++ + curChunk = arg.Chunk(chunkIdxes[i]) + span.Values[i].Array.SetMembers(curChunk.Data()) + valuePositions[i] = 0 + valueOffsets[i] = int64(curChunk.Data().Offset()) + continue + } + break + } + iterSz = exec.Min(int64(curChunk.Len())-valuePositions[i], iterSz) + } + return iterSz + } + + return haveAllScalars, func() (exec.ExecSpan, int64, bool) { + if pos == length { + return exec.ExecSpan{}, pos, false + } + + iterationSize := exec.Min(length-pos, maxChunkSize) + if haveChunked { + iterationSize = nextChunkSpan(iterationSize, span) + } + + span.Len = iterationSize + for i, a := range args { + if a.Kind() != KindScalar { + span.Values[i].Array.SetSlice(valuePositions[i]+valueOffsets[i], iterationSize) + valuePositions[i] += iterationSize + } + } + + pos += iterationSize + debug.Assert(pos <= length, "bad state for iteration exec span") + return span, pos, true + }, nil +} + +var ( + // have a pool of scalar executors to avoid excessive object creation + scalarExecPool = sync.Pool{ + New: func() any { return &scalarExecutor{} }, + } +) diff --git a/go/arrow/compute/expression.go b/go/arrow/compute/expression.go index 9f3a6a14d0fd1..e5bd118dab6b7 100644 --- a/go/arrow/compute/expression.go +++ b/go/arrow/compute/expression.go @@ -27,12 +27,13 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/arrow/scalar" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/compute/internal/exec" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" ) var hashSeed = maphash.MakeSeed() @@ -58,9 +59,6 @@ type Expression interface { // FieldRef returns a pointer to the underlying field reference, or nil if // this expression is not a field reference. FieldRef() *FieldRef - // Descr returns the shape of this expression will evaluate to including the type - // and whether it will be an Array, Scalar, or either. - Descr() ValueDescr // Type returns the datatype this expression will evaluate to. Type() arrow.DataType @@ -146,14 +144,6 @@ func (l *Literal) IsSatisfiable() bool { return true } -func (l *Literal) Descr() ValueDescr { - if ad, ok := l.Literal.(ArrayLikeDatum); ok { - return ad.Descr() - } - - return ValueDescr{ShapeAny, nil} -} - func (l *Literal) Hash() uint64 { if l.IsScalarExpr() { return scalar.Hash(hashSeed, l.Literal.(*ScalarDatum).Value) @@ -183,7 +173,7 @@ type Parameter struct { ref *FieldRef // post bind props - descr ValueDescr + dt arrow.DataType index int bound boundRef @@ -191,12 +181,11 @@ type Parameter struct { func (Parameter) IsNullLiteral() bool { return false } func (p *Parameter) boundExpr() boundRef { return p.bound } -func (p *Parameter) Type() arrow.DataType { return p.descr.Type } +func (p *Parameter) Type() arrow.DataType { return p.dt } func (p *Parameter) IsBound() bool { return p.Type() != nil } func (p *Parameter) IsScalarExpr() bool { return p.ref != nil } func (p *Parameter) IsSatisfiable() bool { return p.Type() == nil || p.Type().ID() != arrow.NULL } func (p *Parameter) FieldRef() *FieldRef { return p.ref } -func (p *Parameter) Descr() ValueDescr { return p.descr } func (p *Parameter) Hash() uint64 { return p.ref.Hash(hashSeed) } func (p *Parameter) String() string { @@ -219,7 +208,7 @@ func (p *Parameter) Equals(other Expression) bool { } func (p *Parameter) Bind(ctx context.Context, mem memory.Allocator, schema *arrow.Schema) (Expression, error) { - bound, descr, index, _, err := bindExprSchema(ctx, mem, p, schema) + bound, dt, index, _, err := bindExprSchema(ctx, mem, p, schema) if err != nil { return nil, err } @@ -227,7 +216,7 @@ func (p *Parameter) Bind(ctx context.Context, mem memory.Allocator, schema *arro return &Parameter{ ref: p.ref, index: index, - descr: descr, + dt: dt, bound: bound, }, nil } @@ -325,7 +314,7 @@ func optionsToString(fn FunctionOptions) string { type Call struct { funcName string args []Expression - descr ValueDescr + dt arrow.DataType options FunctionOptions cachedHash uint64 @@ -335,8 +324,7 @@ type Call struct { func (c *Call) boundExpr() boundRef { return c.bound } func (c *Call) IsNullLiteral() bool { return false } func (c *Call) FieldRef() *FieldRef { return nil } -func (c *Call) Descr() ValueDescr { return c.descr } -func (c *Call) Type() arrow.DataType { return c.descr.Type } +func (c *Call) Type() arrow.DataType { return c.dt } func (c *Call) IsSatisfiable() bool { return c.Type() == nil || c.Type().ID() != arrow.NULL } func (c *Call) String() string { @@ -388,7 +376,7 @@ func (c *Call) Hash() uint64 { h.WriteString(c.funcName) c.cachedHash = h.Sum64() for _, arg := range c.args { - c.cachedHash = hashCombine(c.cachedHash, arg.Hash()) + c.cachedHash = exec.HashCombine(c.cachedHash, arg.Hash()) } return c.cachedHash } @@ -463,6 +451,10 @@ type FunctionOptionsEqual interface { Equals(FunctionOptions) bool } +type FunctionOptionsCloneable interface { + Clone() FunctionOptions +} + type MakeStructOptions struct { FieldNames []string `compute:"field_names"` FieldNullability []bool `compute:"field_nullability"` diff --git a/go/arrow/compute/expression_test.go b/go/arrow/compute/expression_test.go index 00cfd969c7921..50c255c2d7dff 100644 --- a/go/arrow/compute/expression_test.go +++ b/go/arrow/compute/expression_test.go @@ -20,11 +20,11 @@ package compute_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/compute" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/arrow/scalar" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/compute" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/fieldref.go b/go/arrow/compute/fieldref.go index badae37257295..23dd3102d4cda 100644 --- a/go/arrow/compute/fieldref.go +++ b/go/arrow/compute/fieldref.go @@ -27,8 +27,8 @@ import ( "unicode" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" ) var ( diff --git a/go/arrow/compute/fieldref_test.go b/go/arrow/compute/fieldref_test.go index 3ef27f77d204c..cfada32ca0ceb 100644 --- a/go/arrow/compute/fieldref_test.go +++ b/go/arrow/compute/fieldref_test.go @@ -19,10 +19,10 @@ package compute_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/compute" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/compute" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/compute/funckind_string.go b/go/arrow/compute/funckind_string.go new file mode 100644 index 0000000000000..97d3eaa031387 --- /dev/null +++ b/go/arrow/compute/funckind_string.go @@ -0,0 +1,27 @@ +// Code generated by "stringer -type=FuncKind -linecomment"; DO NOT EDIT. + +package compute + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[FuncScalar-0] + _ = x[FuncVector-1] + _ = x[FuncScalarAgg-2] + _ = x[FuncHashAgg-3] + _ = x[FuncMeta-4] +} + +const _FuncKind_name = "ScalarVectorScalarAggregateHashAggregateMeta" + +var _FuncKind_index = [...]uint8{0, 6, 12, 27, 40, 44} + +func (i FuncKind) String() string { + if i < 0 || i >= FuncKind(len(_FuncKind_index)-1) { + return "FuncKind(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _FuncKind_name[_FuncKind_index[i]:_FuncKind_index[i+1]] +} diff --git a/go/arrow/compute/functions.go b/go/arrow/compute/functions.go new file mode 100644 index 0000000000000..c7fd2827b0701 --- /dev/null +++ b/go/arrow/compute/functions.go @@ -0,0 +1,308 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compute + +import ( + "context" + "fmt" + "strings" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/compute/internal/exec" +) + +type Function interface { + Name() string + Kind() FuncKind + Arity() Arity + Doc() FunctionDoc + NumKernels() int + Execute(context.Context, FunctionOptions, ...Datum) (Datum, error) + DispatchExact(...arrow.DataType) (exec.Kernel, error) + DispatchBest(...arrow.DataType) (exec.Kernel, error) + DefaultOptions() FunctionOptions + Validate() error +} + +// Arity defines the number of required arguments for a function. +// +// Naming conventions are taken from https://en.wikipedia.org/wiki/Arity +type Arity struct { + NArgs int + IsVarArgs bool +} + +// Convenience functions to generating Arities + +func Nullary() Arity { return Arity{0, false} } +func Unary() Arity { return Arity{1, false} } +func Binary() Arity { return Arity{2, false} } +func Ternary() Arity { return Arity{3, false} } +func VarArgs(minArgs int) Arity { return Arity{minArgs, true} } + +type FunctionDoc struct { + // A one-line summary of the function, using a verb. + // + // For example, "Add two numeric arrays or scalars" + Summary string + // A detailed description of the function, meant to follow the summary. + Description string + // Symbolic names (identifiers) for the function arguments. + // + // Can be used to generate nicer function signatures. + ArgNames []string + // Name of the options struct type, if any + OptionsType string + // Whether or not options are required for function execution. + // + // If false, then either there are no options for this function, + // or there is a usable default options value. + OptionsRequired bool +} + +// EmptyFuncDoc is a reusable empty function doc definition for convenience. +var EmptyFuncDoc FunctionDoc + +// FuncKind is an enum representing the type of a function +type FuncKind int8 + +const ( + // A function that performs scalar data operations on whole arrays + // of data. Can generally process Array or Scalar values. The size + // of the output will be the same as the size (or broadcasted size, + // in the case of mixing Array and Scalar inputs) of the input. + FuncScalar FuncKind = iota // Scalar + // A function with array input and output whose behavior depends on + // the values of the entire arrays passed, rather than the value of + // each scalar value. + FuncVector // Vector + // A function that computes a scalar summary statistic from array input. + FuncScalarAgg // ScalarAggregate + // A function that computes grouped summary statistics from array + // input and an array of group identifiers. + FuncHashAgg // HashAggregate + // A function that dispatches to other functions and does not contain + // its own kernels. + FuncMeta // Meta +) + +func validateFunctionSummary(summary string) error { + if strings.Contains(summary, "\n") { + return fmt.Errorf("%w: summary contains a newline", arrow.ErrInvalid) + } + if summary[len(summary)-1] == '.' { + return fmt.Errorf("%w: summary ends with a point", arrow.ErrInvalid) + } + return nil +} + +func validateFunctionDescription(desc string) error { + if len(desc) != 0 && desc[len(desc)-1] == '\n' { + return fmt.Errorf("%w: description ends with a newline", arrow.ErrInvalid) + } + + const maxLineSize = 78 + for _, ln := range strings.Split(desc, "\n") { + if len(ln) > maxLineSize { + return fmt.Errorf("%w: description line length exceeds %d characters", arrow.ErrInvalid, maxLineSize) + } + } + return nil +} + +// baseFunction is the base class for compute functions. Function +// implementations should embed this baseFunction and will contain +// a collection of "kernels" which are implementations of the function +// for specific argument types. Selecting a viable kernel for +// executing the function is referred to as "dispatching". +type baseFunction struct { + name string + kind FuncKind + arity Arity + doc FunctionDoc + defaultOpts FunctionOptions +} + +func (b *baseFunction) Name() string { return b.name } +func (b *baseFunction) Kind() FuncKind { return b.kind } +func (b *baseFunction) Arity() Arity { return b.arity } +func (b *baseFunction) Doc() FunctionDoc { return b.doc } +func (b *baseFunction) DefaultOptions() FunctionOptions { return b.defaultOpts } +func (b *baseFunction) Validate() error { + if b.doc.Summary == "" { + return nil + } + + argCount := len(b.doc.ArgNames) + if argCount != b.arity.NArgs && !(b.arity.IsVarArgs && argCount == b.arity.NArgs+1) { + return fmt.Errorf("in function '%s': number of argument names for function doc != function arity", b.name) + } + + if err := validateFunctionSummary(b.doc.Summary); err != nil { + return err + } + return validateFunctionDescription(b.doc.Description) +} + +func checkOptions(fn Function, opts FunctionOptions) error { + if opts == nil && fn.Doc().OptionsRequired { + return fmt.Errorf("%w: function '%s' cannot be called without options", arrow.ErrInvalid, fn.Name()) + } + return nil +} + +func (b *baseFunction) checkArity(nargs int) error { + switch { + case b.arity.IsVarArgs && nargs < b.arity.NArgs: + return fmt.Errorf("%w: varargs function '%s' needs at least %d arguments, but only %d passed", + arrow.ErrInvalid, b.name, b.arity.NArgs, nargs) + case !b.arity.IsVarArgs && nargs != b.arity.NArgs: + return fmt.Errorf("%w: function '%s' accepts %d arguments but %d passed", + arrow.ErrInvalid, b.name, b.arity.NArgs, nargs) + } + return nil +} + +// kernelType is a type contstraint interface that is used for funcImpl +// generic definitions. It will be extended as other kernel types +// are defined. +// +// Currently only ScalarKernels are allowed to be used. +type kernelType interface { + exec.ScalarKernel + + // specifying the Kernel interface here allows us to utilize + // the methods of the Kernel interface on the generic + // constrained type + exec.Kernel +} + +// funcImpl is the basic implementation for any functions that use kernels +// i.e. all except for Meta functions. +type funcImpl[KT kernelType] struct { + baseFunction + + kernels []KT +} + +func (fi *funcImpl[KT]) DispatchExact(vals ...arrow.DataType) (*KT, error) { + if err := fi.checkArity(len(vals)); err != nil { + return nil, err + } + + for i := range fi.kernels { + if fi.kernels[i].GetSig().MatchesInputs(vals) { + return &fi.kernels[i], nil + } + } + + return nil, fmt.Errorf("%w: function '%s' has no kernel matching input types %s", + arrow.ErrNotImplemented, fi.name, arrow.TypesToString(vals)) +} + +func (fi *funcImpl[KT]) NumKernels() int { return len(fi.kernels) } +func (fi *funcImpl[KT]) Kernels() []*KT { + res := make([]*KT, len(fi.kernels)) + for i := range fi.kernels { + res[i] = &fi.kernels[i] + } + return res +} + +// A ScalarFunction is a function that executes element-wise operations +// on arrays or scalars, and therefore whose results generally do not +// depent on the order of the values in the arguments. Accepts and returns +// arrays that are all of the same size. These functions roughly correspond +// to the functions used in most SQL expressions. +type ScalarFunction struct { + funcImpl[exec.ScalarKernel] +} + +// NewScalarFunction constructs a new ScalarFunction object with the passed in +// name, arity and function doc. +func NewScalarFunction(name string, arity Arity, doc FunctionDoc) *ScalarFunction { + return &ScalarFunction{ + funcImpl: funcImpl[exec.ScalarKernel]{ + baseFunction: baseFunction{ + name: name, + arity: arity, + doc: doc, + kind: FuncScalar, + }, + }, + } +} + +func (s *ScalarFunction) SetDefaultOptions(opts FunctionOptions) { + s.defaultOpts = opts +} + +func (s *ScalarFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error) { + return s.funcImpl.DispatchExact(vals...) +} + +func (s *ScalarFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { + return s.DispatchExact(vals...) +} + +// AddNewKernel constructs a new kernel with the provided signature +// and execution/init functions and then adds it to the function's list of +// kernels. This assumes default null handling (intersection of validity bitmaps) +func (s *ScalarFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error { + if err := s.checkArity(len(inTypes)); err != nil { + return err + } + + if s.arity.IsVarArgs && len(inTypes) != 1 { + return fmt.Errorf("%w: varargs signatures must have exactly one input type", arrow.ErrInvalid) + } + + sig := &exec.KernelSignature{ + InputTypes: inTypes, + OutType: outType, + IsVarArgs: s.arity.IsVarArgs, + } + + s.kernels = append(s.kernels, exec.NewScalarKernelWithSig(sig, execFn, init)) + return nil +} + +// AddKernel adds the provided kernel to the list of kernels +// this function has. A copy of the kernel is added to the slice of kernels, +// which means that a given kernel object can be created, added and then +// reused to add other kernels. +func (s *ScalarFunction) AddKernel(k exec.ScalarKernel) error { + if err := s.checkArity(len(k.Signature.InputTypes)); err != nil { + return err + } + + if s.arity.IsVarArgs && !k.Signature.IsVarArgs { + return fmt.Errorf("%w: function accepts varargs but kernel signature does not", arrow.ErrInvalid) + } + + s.kernels = append(s.kernels, k) + return nil +} + +// Execute uses the passed in context, function options and arguments to eagerly +// execute the function using kernel dispatch, batch iteration and memory +// allocation details as defined by the kernel. +// +// If opts is nil, then the DefaultOptions() will be used. +func (s *ScalarFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { + return execInternal(ctx, s, opts, -1, args...) +} diff --git a/go/arrow/compute/functions_test.go b/go/arrow/compute/functions_test.go new file mode 100644 index 0000000000000..78dbd8be5e4f1 --- /dev/null +++ b/go/arrow/compute/functions_test.go @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compute_test + +import ( + "testing" + + "github.com/apache/arrow/go/v10/arrow/compute" + "github.com/stretchr/testify/assert" +) + +func TestArityBasics(t *testing.T) { + nullary := compute.Nullary() + assert.Equal(t, 0, nullary.NArgs) + assert.False(t, nullary.IsVarArgs) + + unary := compute.Unary() + assert.Equal(t, 1, unary.NArgs) + assert.False(t, unary.IsVarArgs) + + binary := compute.Binary() + assert.Equal(t, 2, binary.NArgs) + assert.False(t, binary.IsVarArgs) + + ternary := compute.Ternary() + assert.Equal(t, 3, ternary.NArgs) + assert.False(t, ternary.IsVarArgs) + + varargs := compute.VarArgs(2) + assert.Equal(t, 2, varargs.NArgs) + assert.True(t, varargs.IsVarArgs) +} diff --git a/go/arrow/compute/go.mod b/go/arrow/compute/go.mod new file mode 100644 index 0000000000000..09559d7a7a2a0 --- /dev/null +++ b/go/arrow/compute/go.mod @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +module github.com/apache/arrow/go/v10/arrow/compute + +go 1.18 + +replace github.com/apache/arrow/go/v10 => ../../ + +require ( + github.com/apache/arrow/go/v10 v10.0.0-00010101000000-000000000000 + github.com/stretchr/testify v1.8.0 + golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e + golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f +) + +require ( + github.com/andybalholm/brotli v1.0.4 // indirect + github.com/apache/thrift v0.16.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/goccy/go-json v0.9.10 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/google/flatbuffers v2.0.6+incompatible // indirect + github.com/klauspost/asmfmt v1.3.2 // indirect + github.com/klauspost/compress v1.15.9 // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect + github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect + github.com/pierrec/lz4/v4 v4.1.15 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/zeebo/xxh3 v1.0.2 // indirect + golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect + golang.org/x/sys v0.0.0-20220808155132-1c4a2a72c664 // indirect + golang.org/x/tools v0.1.12 // indirect + gonum.org/v1/gonum v0.11.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go/arrow/compute/go.sum b/go/arrow/compute/go.sum new file mode 100644 index 0000000000000..b05bdd419c7c4 --- /dev/null +++ b/go/arrow/compute/go.sum @@ -0,0 +1,320 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= +git.sr.ht/~sbinet/gg v0.3.1/go.mod h1:KGYtlADtqsqANL9ueOFkWymvzUvLMQllU5Ixo+8v3pc= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= +github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= +github.com/ajstarks/deck v0.0.0-20200831202436-30c9fc6549a9/go.mod h1:JynElWSGnm/4RlzPXRlREEwqTHAN3T56Bv2ITsFT3gY= +github.com/ajstarks/deck/generate v0.0.0-20210309230005-c3f852c02e19/go.mod h1:T13YZdzov6OU0A1+RfKZiZN9ca6VeKdBdyDV+BY97Tk= +github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= +github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM= +github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= +github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY= +github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= +github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= +github.com/boombuler/barcode v1.0.1/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= +github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= +github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g= +github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks= +github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= +github.com/go-fonts/liberation v0.2.0/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= +github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY= +github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= +github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U= +github.com/go-latex/latex v0.0.0-20210823091927-c0d11ff05a81/go.mod h1:SX0U8uGpxhq9o2S/CELCSUxEWWAuoCUcVCQWv7G2OCk= +github.com/go-pdf/fpdf v0.5.0/go.mod h1:HzcnA+A23uwogo0tp9yU+l3V+KXhiESpt1PMayhOh5M= +github.com/go-pdf/fpdf v0.6.0/go.mod h1:HzcnA+A23uwogo0tp9yU+l3V+KXhiESpt1PMayhOh5M= +github.com/goccy/go-json v0.9.10 h1:hCeNmprSNLB8B8vQKWl6DpuH0t60oEs+TAk9a7CScKc= +github.com/goccy/go-json v0.9.10/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/flatbuffers v2.0.6+incompatible h1:XHFReMv7nFFusa+CEokzWbzaYocKXI6C7hdU5Kgh9Lw= +github.com/google/flatbuffers v2.0.6+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= +github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= +github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= +github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= +github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY= +github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-sqlite3 v1.14.12/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= +github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= +github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= +github.com/phpdave11/gofpdi v1.0.13/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= +github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= +github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= +github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= +golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20210607152325-775e3b0c77b9/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= +golang.org/x/image v0.0.0-20210628002857-a66eb6448b8d/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= +golang.org/x/image v0.0.0-20211028202545-6944b10bf410/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= +golang.org/x/image v0.0.0-20220302094943-723b81ca9867/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= +golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220808155132-1c4a2a72c664 h1:v1W7bwXHsnLLloWYTVEdvGvA7BHMeBYsPcF0GLDxIRs= +golang.org/x/sys v0.0.0-20220808155132-1c4a2a72c664/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= +golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= +golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= +golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f h1:uF6paiQQebLeSXkrTqHqz0MXhXXS1KgF41eUdBNvxK0= +golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= +gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= +gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= +gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= +gonum.org/v1/gonum v0.11.0 h1:f1IJhK4Km5tBJmaiJXtk/PkL4cdVX6J+tGiM187uT5E= +gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA= +gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= +gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= +gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= +gonum.org/v1/plot v0.10.1/go.mod h1:VZW5OlhkL1mysU9vaqNHnsy86inf6Ot+jB3r+BczCEo= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.48.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.1.3/go.mod h1:NgwopIslSNH47DimFoV78dnkksY2EFtX0ajyb3K/las= +lukechampine.com/uint128 v1.1.1/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= +lukechampine.com/uint128 v1.2.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= +modernc.org/cc/v3 v3.36.0/go.mod h1:NFUHyPn4ekoC/JHeZFfZurN6ixxawE1BnVonP/oahEI= +modernc.org/cc/v3 v3.36.1/go.mod h1:NFUHyPn4ekoC/JHeZFfZurN6ixxawE1BnVonP/oahEI= +modernc.org/ccgo/v3 v3.0.0-20220428102840-41399a37e894/go.mod h1:eI31LL8EwEBKPpNpA4bU1/i+sKOwOrQy8D87zWUcRZc= +modernc.org/ccgo/v3 v3.0.0-20220430103911-bc99d88307be/go.mod h1:bwdAnOoaIt8Ax9YdWGjxWsdkPcZyRPHqrOvJxaKAKGw= +modernc.org/ccgo/v3 v3.16.4/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ= +modernc.org/ccgo/v3 v3.16.6/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ= +modernc.org/ccgo/v3 v3.16.8/go.mod h1:zNjwkizS+fIFDrDjIAgBSCLkWbJuHF+ar3QRn+Z9aws= +modernc.org/ccorpus v1.11.6/go.mod h1:2gEUTrWqdpH2pXsmTM1ZkjeSrUWDpjMu2T6m29L/ErQ= +modernc.org/httpfs v1.0.6/go.mod h1:7dosgurJGp0sPaRanU53W4xZYKh14wfzX420oZADeHM= +modernc.org/libc v0.0.0-20220428101251-2d5f3daf273b/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA= +modernc.org/libc v1.16.0/go.mod h1:N4LD6DBE9cf+Dzf9buBlzVJndKr/iJHG97vGLHYnb5A= +modernc.org/libc v1.16.1/go.mod h1:JjJE0eu4yeK7tab2n4S1w8tlWd9MxXLRzheaRnAKymU= +modernc.org/libc v1.16.7/go.mod h1:hYIV5VZczAmGZAnG15Vdngn5HSF5cSkbvfz2B7GRuVU= +modernc.org/libc v1.16.17/go.mod h1:hYIV5VZczAmGZAnG15Vdngn5HSF5cSkbvfz2B7GRuVU= +modernc.org/libc v1.16.19/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA= +modernc.org/mathutil v1.2.2/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/mathutil v1.4.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.1.1/go.mod h1:/0wo5ibyrQiaoUoH7f9D8dnglAmILJ5/cxZlRECf+Nw= +modernc.org/opt v0.1.1/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= +modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= +modernc.org/sqlite v1.18.0/go.mod h1:B9fRWZacNxJBHoCJZQr1R54zhVn3fjfl0aszflrTSxY= +modernc.org/strutil v1.1.1/go.mod h1:DE+MQQ/hjKBZS2zNInV5hhcipt5rLPWkmpbGeW5mmdw= +modernc.org/strutil v1.1.2/go.mod h1:OYajnUAcI/MX+XD/Wx7v1bbdvcQSvxgtb0gC+u3d3eg= +modernc.org/tcl v1.13.1/go.mod h1:XOLfOwzhkljL4itZkK6T72ckMgvj0BDsnKNdZVUOecw= +modernc.org/token v1.0.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= +modernc.org/z v1.5.1/go.mod h1:eWFB510QWW5Th9YGZT81s+LwvaAs3Q2yr4sP0rmLkv8= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/go/arrow/compute/hash_util.go b/go/arrow/compute/internal/exec/hash_util.go similarity index 88% rename from go/arrow/compute/hash_util.go rename to go/arrow/compute/internal/exec/hash_util.go index d0ecca5bc4385..7630c7495f2bc 100644 --- a/go/arrow/compute/hash_util.go +++ b/go/arrow/compute/internal/exec/hash_util.go @@ -14,11 +14,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -package compute +package exec + +import "hash/maphash" + +var hashSeed = maphash.MakeSeed() // ADAPTED FROM HASH UTILITIES FOR BOOST -func hashCombine(seed, value uint64) uint64 { +func HashCombine(seed, value uint64) uint64 { seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2) return seed } diff --git a/go/arrow/compute/internal/exec/kernel.go b/go/arrow/compute/internal/exec/kernel.go new file mode 100644 index 0000000000000..8716db656bb24 --- /dev/null +++ b/go/arrow/compute/internal/exec/kernel.go @@ -0,0 +1,587 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exec + +import ( + "context" + "fmt" + "hash/maphash" + "strings" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" + "golang.org/x/exp/slices" +) + +type ctxAllocKey struct{} + +// WithAllocator returns a new context with the provided allocator +// embedded into the context. +func WithAllocator(ctx context.Context, mem memory.Allocator) context.Context { + return context.WithValue(ctx, ctxAllocKey{}, mem) +} + +// GetAllocator retrieves the allocator from the context, or returns +// memory.DefaultAllocator if there was no allocator in the provided +// context. +func GetAllocator(ctx context.Context) memory.Allocator { + mem, ok := ctx.Value(ctxAllocKey{}).(memory.Allocator) + if !ok { + return memory.DefaultAllocator + } + return mem +} + +// Kernel defines the minimum interface required for the basic execution +// kernel. It will grow as the implementation requires. +type Kernel interface { + GetInitFn() KernelInitFn + GetSig() *KernelSignature +} + +// NonAggKernel builds on the base Kernel interface for +// non aggregate execution kernels. Specifically this will +// represent Scalar and Vector kernels. +type NonAggKernel interface { + Kernel + Exec(*KernelCtx, *ExecSpan, *ExecResult) error + GetNullHandling() NullHandling + GetMemAlloc() MemAlloc + CanFillSlices() bool +} + +// KernelCtx is a small struct holding the context for a kernel execution +// consisting of a pointer to the kernel, initialized state (if needed) +// and the context for this execution. +type KernelCtx struct { + Ctx context.Context + Kernel Kernel + State KernelState +} + +func (k *KernelCtx) Allocate(bufsize int) *memory.Buffer { + buf := memory.NewResizableBuffer(GetAllocator(k.Ctx)) + buf.Resize(bufsize) + return buf +} + +func (k *KernelCtx) AllocateBitmap(nbits int64) *memory.Buffer { + nbytes := bitutil.BytesForBits(nbits) + return k.Allocate(int(nbytes)) +} + +// TypeMatcher define an interface for matching Input or Output types +// for execution kernels. There are multiple implementations of this +// interface provided by this package. +type TypeMatcher interface { + fmt.Stringer + Matches(typ arrow.DataType) bool + Equals(other TypeMatcher) bool +} + +type sameTypeIDMatcher struct { + accepted arrow.Type +} + +func (s sameTypeIDMatcher) Matches(typ arrow.DataType) bool { return s.accepted == typ.ID() } +func (s sameTypeIDMatcher) Equals(other TypeMatcher) bool { + if s == other { + return true + } + + o, ok := other.(*sameTypeIDMatcher) + if !ok { + return false + } + + return s.accepted == o.accepted +} + +func (s sameTypeIDMatcher) String() string { + return "Type::" + s.accepted.String() +} + +// SameTypeID returns a type matcher which will match +// any DataType that uses the same arrow.Type ID as the one +// passed in here. +func SameTypeID(id arrow.Type) TypeMatcher { return &sameTypeIDMatcher{id} } + +type timeUnitMatcher struct { + id arrow.Type + unit arrow.TimeUnit +} + +func (s timeUnitMatcher) Matches(typ arrow.DataType) bool { + if typ.ID() != s.id { + return false + } + return s.unit == typ.(arrow.TemporalWithUnit).TimeUnit() +} + +func (s timeUnitMatcher) String() string { + return strings.ToLower(s.id.String()) + "(" + s.unit.String() + ")" +} + +func (s *timeUnitMatcher) Equals(other TypeMatcher) bool { + if s == other { + return true + } + + o, ok := other.(*timeUnitMatcher) + if !ok { + return false + } + return o.id == s.id && o.unit == s.unit +} + +// TimestampTypeUnit returns a TypeMatcher that will match only +// a Timestamp datatype with the specified TimeUnit. +func TimestampTypeUnit(unit arrow.TimeUnit) TypeMatcher { + return &timeUnitMatcher{arrow.TIMESTAMP, unit} +} + +// Time32TypeUnit returns a TypeMatcher that will match only +// a Time32 datatype with the specified TimeUnit. +func Time32TypeUnit(unit arrow.TimeUnit) TypeMatcher { + return &timeUnitMatcher{arrow.TIME32, unit} +} + +// Time64TypeUnit returns a TypeMatcher that will match only +// a Time64 datatype with the specified TimeUnit. +func Time64TypeUnit(unit arrow.TimeUnit) TypeMatcher { + return &timeUnitMatcher{arrow.TIME64, unit} +} + +// DurationTypeUnit returns a TypeMatcher that will match only +// a Duration datatype with the specified TimeUnit. +func DurationTypeUnit(unit arrow.TimeUnit) TypeMatcher { + return &timeUnitMatcher{arrow.DURATION, unit} +} + +type integerMatcher struct{} + +func (integerMatcher) String() string { return "integer" } +func (integerMatcher) Matches(typ arrow.DataType) bool { return arrow.IsInteger(typ.ID()) } +func (integerMatcher) Equals(other TypeMatcher) bool { + _, ok := other.(integerMatcher) + return ok +} + +type binaryLikeMatcher struct{} + +func (binaryLikeMatcher) String() string { return "binary-like" } +func (binaryLikeMatcher) Matches(typ arrow.DataType) bool { return arrow.IsBinaryLike(typ.ID()) } +func (binaryLikeMatcher) Equals(other TypeMatcher) bool { + _, ok := other.(binaryLikeMatcher) + return ok +} + +type largeBinaryLikeMatcher struct{} + +func (largeBinaryLikeMatcher) String() string { return "large-binary-like" } +func (largeBinaryLikeMatcher) Matches(typ arrow.DataType) bool { + return arrow.IsLargeBinaryLike(typ.ID()) +} +func (largeBinaryLikeMatcher) Equals(other TypeMatcher) bool { + _, ok := other.(largeBinaryLikeMatcher) + return ok +} + +type fsbLikeMatcher struct{} + +func (fsbLikeMatcher) String() string { return "fixed-size-binary-like" } +func (fsbLikeMatcher) Matches(typ arrow.DataType) bool { return arrow.IsFixedSizeBinary(typ.ID()) } +func (fsbLikeMatcher) Equals(other TypeMatcher) bool { + _, ok := other.(fsbLikeMatcher) + return ok +} + +// Integer returns a TypeMatcher which will match any integral type like int8 or uint16 +func Integer() TypeMatcher { return integerMatcher{} } + +// BinaryLike returns a TypeMatcher that will match Binary or String +func BinaryLike() TypeMatcher { return binaryLikeMatcher{} } + +// LargeBinaryLike returns a TypeMatcher which will match LargeBinary or LargeString +func LargeBinaryLike() TypeMatcher { return largeBinaryLikeMatcher{} } + +// FixedSizeBinaryLike returns a TypeMatcher that will match FixedSizeBinary +// or Decimal128/256 +func FixedSizeBinaryLike() TypeMatcher { return fsbLikeMatcher{} } + +type primitiveMatcher struct{} + +func (primitiveMatcher) String() string { return "primitive" } +func (primitiveMatcher) Matches(typ arrow.DataType) bool { return arrow.IsPrimitive(typ.ID()) } +func (primitiveMatcher) Equals(other TypeMatcher) bool { + _, ok := other.(primitiveMatcher) + return ok +} + +// Primitive returns a TypeMatcher that will match any type that arrow.IsPrimitive +// returns true for. +func Primitive() TypeMatcher { return primitiveMatcher{} } + +// InputKind is an enum representing the type of Input matching +// that will be done. Either accepting any type, an exact specific type +// or using a TypeMatcher. +type InputKind int8 + +const ( + InputAny InputKind = iota + InputExact + InputUseMatcher +) + +// InputType is used for type checking arguments passed to a kernel +// and stored within a KernelSignature. The type-checking rule can +// be supplied either with an exact DataType instance or a custom +// TypeMatcher. +type InputType struct { + Kind InputKind + Type arrow.DataType + Matcher TypeMatcher +} + +func NewExactInput(dt arrow.DataType) InputType { return InputType{Kind: InputExact, Type: dt} } +func NewMatchedInput(match TypeMatcher) InputType { + return InputType{Kind: InputUseMatcher, Matcher: match} +} +func NewIDInput(id arrow.Type) InputType { return NewMatchedInput(SameTypeID(id)) } + +func (it InputType) String() string { + switch it.Kind { + case InputAny: + return "any" + case InputUseMatcher: + return it.Matcher.String() + case InputExact: + return it.Type.String() + } + return "" +} + +func (it *InputType) Equals(other *InputType) bool { + if it == other { + return true + } + + if it.Kind != other.Kind { + return false + } + + switch it.Kind { + case InputAny: + return true + case InputExact: + return arrow.TypeEqual(it.Type, other.Type) + case InputUseMatcher: + return it.Matcher.Equals(other.Matcher) + default: + return false + } +} + +func (it InputType) Hash() uint64 { + var h maphash.Hash + + h.SetSeed(hashSeed) + result := HashCombine(h.Sum64(), uint64(it.Kind)) + switch it.Kind { + case InputExact: + result = HashCombine(result, arrow.HashType(hashSeed, it.Type)) + } + return result +} + +func (it InputType) Matches(dt arrow.DataType) bool { + switch it.Kind { + case InputExact: + return arrow.TypeEqual(it.Type, dt) + case InputUseMatcher: + return it.Matcher.Matches(dt) + case InputAny: + return true + default: + debug.Assert(false, "invalid InputKind") + return true + } +} + +// ResolveKind defines the way that a particular OutputType resolves +// its type. Either it has a fixed type to resolve to or it contains +// a Resolver which will compute the resolved type based on +// the input types. +type ResolveKind int8 + +const ( + ResolveFixed ResolveKind = iota + ResolveComputed +) + +// TypeResolver is simply a function that takes a KernelCtx and a list of input types +// and returns the resolved type or an error. +type TypeResolver = func(*KernelCtx, []arrow.DataType) (arrow.DataType, error) + +type OutputType struct { + Kind ResolveKind + Type arrow.DataType + Resolver TypeResolver +} + +func NewOutputType(dt arrow.DataType) OutputType { + return OutputType{Kind: ResolveFixed, Type: dt} +} + +func NewComputedOutputType(resolver TypeResolver) OutputType { + return OutputType{Kind: ResolveComputed, Resolver: resolver} +} + +func (o OutputType) String() string { + if o.Kind == ResolveFixed { + return o.Type.String() + } + return "computed" +} + +func (o OutputType) Resolve(ctx *KernelCtx, types []arrow.DataType) (arrow.DataType, error) { + switch o.Kind { + case ResolveFixed: + return o.Type, nil + } + + return o.Resolver(ctx, types) +} + +// NullHandling is an enum representing how a particular Kernel +// wants the executor to handle nulls. +type NullHandling int8 + +const ( + // Compute the output validity bitmap by intersection the validity + // bitmaps of the arguments using bitwise-and operations. This means + // that values in the output are valid/non-null only if the corresponding + // values in all input arguments were valid/non-null. Kernels generally + // do not have to touch the bitmap afterwards, but a kernel's exec function + // is permitted to alter the bitmap after the null intersection is computed + // if necessary. + NullIntersection NullHandling = iota + // Kernel expects a pre-allocated buffer to write the result bitmap + // into. + NullComputedPrealloc + // Kernel will allocate and set the validity bitmap of the output + NullComputedNoPrealloc + // kernel output is never null and a validity bitmap doesn't need to + // be allocated + NullNoOutput +) + +// MemAlloc is the preference for preallocating memory of fixed-width +// type outputs during kernel execution. +type MemAlloc int8 + +const ( + // For data types that support pre-allocation (fixed-width), the + // kernel expects to be provided a pre-allocated buffer to write into. + // Non-fixed-width types must always allocate their own buffers. + // The allocation is made for the same length as the execution batch, + // so vector kernels yielding differently sized outputs should not + // use this. + // + // It is valid for the data to not be preallocated but the validity + // bitmap is (or is computed using intersection). + // + // For variable-size output types like Binary or String, or for nested + // types, this option has no effect. + MemPrealloc MemAlloc = iota + // The kernel is responsible for allocating its own data buffer + // for fixed-width output types. + MemNoPrealloc +) + +type KernelState any + +// KernelInitArgs are the arguments required to initialize an Kernel's +// state using the input types and any options. +type KernelInitArgs struct { + Kernel Kernel + Inputs []arrow.DataType + // Options are opaque and specific to the Kernel being initialized, + // may be nil if the kernel doesn't require options. + Options any +} + +// KernelInitFn is any function that receives a KernelCtx and initialization +// arguments and returns the initialized state or an error. +type KernelInitFn = func(*KernelCtx, KernelInitArgs) (KernelState, error) + +// KernelSignature holds the input and output types for a kernel. +// +// Variable argument functions with a minimum of N arguments should pass +// up to N input types to be used to validate for invocation. The first +// N-1 types will be matched against the first N-1 arguments and the last +// type will be matched against the remaining arguments. +type KernelSignature struct { + InputTypes []InputType + OutType OutputType + IsVarArgs bool + + // store the hashcode after it is computed so we don't + // need to recompute it + hashCode uint64 +} + +func (k KernelSignature) String() string { + var b strings.Builder + if k.IsVarArgs { + b.WriteString("varargs[") + } else { + b.WriteByte('(') + } + + for i, t := range k.InputTypes { + if i != 0 { + b.WriteString(", ") + } + b.WriteString(t.String()) + } + if k.IsVarArgs { + b.WriteString("*]") + } else { + b.WriteByte(')') + } + + b.WriteString(" -> ") + b.WriteString(k.OutType.String()) + return b.String() +} + +func (k KernelSignature) Equals(other KernelSignature) bool { + if k.IsVarArgs != other.IsVarArgs { + return false + } + + return slices.EqualFunc(k.InputTypes, other.InputTypes, func(e1, e2 InputType) bool { + return e1.Equals(&e2) + }) +} + +func (k *KernelSignature) Hash() uint64 { + if k.hashCode != 0 { + return k.hashCode + } + + var h maphash.Hash + h.SetSeed(hashSeed) + result := h.Sum64() + for _, typ := range k.InputTypes { + result = HashCombine(result, typ.Hash()) + } + k.hashCode = result + return result +} + +func (k KernelSignature) MatchesInputs(types []arrow.DataType) bool { + switch k.IsVarArgs { + case true: + // check that it has enough to match at least the non-vararg types + if len(types) < (len(k.InputTypes) - 1) { + return false + } + + for i, t := range types { + if !k.InputTypes[Min(i, len(k.InputTypes)-1)].Matches(t) { + return false + } + } + case false: + if len(types) != len(k.InputTypes) { + return false + } + for i, t := range types { + if !k.InputTypes[i].Matches(t) { + return false + } + } + } + return true +} + +// ArrayKernelExec is an alias definition for a kernel's execution function. +// +// This is used for both stateless and stateful kernels. If a kernel +// depends on some execution state, it can be accessed from the KernelCtx +// object, which also contains the context.Context object which can be +// used for shortcircuiting by checking context.Done / context.Err. +// This allows kernels to control handling timeouts or cancellation of +// computation. +type ArrayKernelExec = func(*KernelCtx, *ExecSpan, *ExecResult) error + +type kernel struct { + Init KernelInitFn + Signature *KernelSignature + Data KernelState + Parallelizable bool +} + +func (k kernel) GetInitFn() KernelInitFn { return k.Init } +func (k kernel) GetSig() *KernelSignature { return k.Signature } + +// A ScalarKernel is the kernel implementation for a Scalar Function. +// In addition to the members found in the base Kernel, it contains +// the null handling and memory pre-allocation preferences. +type ScalarKernel struct { + kernel + + ExecFn ArrayKernelExec + CanWriteIntoSlices bool + NullHandling NullHandling + MemAlloc MemAlloc +} + +// NewScalarKernel constructs a new kernel for scalar execution, constructing +// a KernelSignature with the provided input types and output type, and using +// the passed in execution implementation and initialization function. +func NewScalarKernel(in []InputType, out OutputType, exec ArrayKernelExec, init KernelInitFn) ScalarKernel { + return NewScalarKernelWithSig(&KernelSignature{ + InputTypes: in, + OutType: out, + }, exec, init) +} + +// NewScalarKernelWithSig is a convenience when you already have a signature +// to use for constructing a kernel. It's equivalent to passing the components +// of the signature (input and output types) to NewScalarKernel. +func NewScalarKernelWithSig(sig *KernelSignature, exec ArrayKernelExec, init KernelInitFn) ScalarKernel { + return ScalarKernel{ + kernel: kernel{Signature: sig, Init: init, Parallelizable: true}, + ExecFn: exec, + CanWriteIntoSlices: true, + NullHandling: NullIntersection, + MemAlloc: MemPrealloc, + } +} + +func (s *ScalarKernel) Exec(ctx *KernelCtx, sp *ExecSpan, out *ExecResult) error { + return s.ExecFn(ctx, sp, out) +} + +func (s ScalarKernel) GetNullHandling() NullHandling { return s.NullHandling } +func (s ScalarKernel) GetMemAlloc() MemAlloc { return s.MemAlloc } +func (s ScalarKernel) CanFillSlices() bool { return s.CanWriteIntoSlices } diff --git a/go/arrow/compute/internal/exec/kernel_test.go b/go/arrow/compute/internal/exec/kernel_test.go new file mode 100644 index 0000000000000..3584cfd66b1a9 --- /dev/null +++ b/go/arrow/compute/internal/exec/kernel_test.go @@ -0,0 +1,552 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exec_test + +import ( + "fmt" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/compute" + "github.com/apache/arrow/go/v10/arrow/compute/internal/exec" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" + "github.com/stretchr/testify/assert" +) + +func TestTypeMatcherSameTypeID(t *testing.T) { + matcher := exec.SameTypeID(arrow.DECIMAL128) + assert.True(t, matcher.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) + assert.False(t, matcher.Matches(arrow.PrimitiveTypes.Int8)) + + assert.Equal(t, "Type::DECIMAL128", matcher.String()) + + assert.True(t, matcher.Equals(matcher)) + assert.True(t, matcher.Equals(exec.SameTypeID(arrow.DECIMAL))) + assert.False(t, matcher.Equals(exec.SameTypeID(arrow.TIMESTAMP))) + assert.False(t, matcher.Equals(exec.Time32TypeUnit(arrow.Microsecond))) +} + +func TestTypeMatcherTimestampTypeUnit(t *testing.T) { + matcher := exec.TimestampTypeUnit(arrow.Millisecond) + matcher2 := exec.Time32TypeUnit(arrow.Millisecond) + matcher3 := exec.Time64TypeUnit(arrow.Microsecond) + matcher4 := exec.DurationTypeUnit(arrow.Microsecond) + + assert.True(t, matcher.Matches(arrow.FixedWidthTypes.Timestamp_ms)) + assert.True(t, matcher.Matches(&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "utc"})) + assert.False(t, matcher.Matches(arrow.FixedWidthTypes.Timestamp_s)) + assert.False(t, matcher.Matches(arrow.FixedWidthTypes.Time32ms)) + assert.True(t, matcher2.Matches(arrow.FixedWidthTypes.Time32ms)) + + assert.True(t, matcher3.Matches(arrow.FixedWidthTypes.Time64us)) + assert.False(t, matcher3.Matches(arrow.FixedWidthTypes.Time64ns)) + assert.True(t, matcher4.Matches(arrow.FixedWidthTypes.Duration_us)) + assert.False(t, matcher4.Matches(arrow.FixedWidthTypes.Duration_ms)) + + // check String() representation + assert.Equal(t, "timestamp(s)", exec.TimestampTypeUnit(arrow.Second).String()) + assert.Equal(t, "timestamp(ms)", exec.TimestampTypeUnit(arrow.Millisecond).String()) + assert.Equal(t, "timestamp(us)", exec.TimestampTypeUnit(arrow.Microsecond).String()) + assert.Equal(t, "timestamp(ns)", exec.TimestampTypeUnit(arrow.Nanosecond).String()) + + // equals implementation + assert.True(t, matcher.Equals(matcher)) + assert.True(t, matcher.Equals(exec.TimestampTypeUnit(arrow.Millisecond))) + assert.False(t, matcher.Equals(exec.TimestampTypeUnit(arrow.Microsecond))) + assert.False(t, matcher.Equals(exec.Time32TypeUnit(arrow.Millisecond))) + assert.False(t, matcher3.Equals(matcher2)) + assert.False(t, matcher4.Equals(matcher3)) + assert.True(t, matcher4.Equals(exec.DurationTypeUnit(arrow.Microsecond))) + assert.False(t, matcher.Equals(exec.SameTypeID(arrow.TIMESTAMP))) +} + +func TestIntegerMatcher(t *testing.T) { + match := exec.Integer() + + assert.Equal(t, "integer", match.String()) + assert.True(t, match.Matches(arrow.PrimitiveTypes.Int8)) + assert.True(t, match.Matches(arrow.PrimitiveTypes.Uint64)) + assert.True(t, match.Equals(exec.Integer())) + assert.False(t, match.Equals(exec.BinaryLike())) +} + +func TestBinaryLikeMatcher(t *testing.T) { + match := exec.BinaryLike() + + assert.Equal(t, "binary-like", match.String()) + assert.True(t, match.Matches(arrow.BinaryTypes.String)) + assert.True(t, match.Matches(arrow.BinaryTypes.Binary)) + assert.False(t, match.Matches(arrow.BinaryTypes.LargeString)) + assert.False(t, match.Matches(arrow.BinaryTypes.LargeBinary)) + assert.False(t, match.Equals(exec.LargeBinaryLike())) + assert.True(t, match.Equals(exec.BinaryLike())) +} + +func TestLargeBinaryLikeMatcher(t *testing.T) { + match := exec.LargeBinaryLike() + + assert.Equal(t, "large-binary-like", match.String()) + assert.False(t, match.Matches(arrow.BinaryTypes.String)) + assert.False(t, match.Matches(arrow.BinaryTypes.Binary)) + assert.True(t, match.Matches(arrow.BinaryTypes.LargeString)) + assert.True(t, match.Matches(arrow.BinaryTypes.LargeBinary)) + assert.True(t, match.Equals(exec.LargeBinaryLike())) + assert.False(t, match.Equals(exec.BinaryLike())) +} + +func TestFixedSizeBinaryMatcher(t *testing.T) { + match := exec.FixedSizeBinaryLike() + + assert.Equal(t, "fixed-size-binary-like", match.String()) + assert.False(t, match.Matches(arrow.BinaryTypes.String)) + assert.True(t, match.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 5})) + assert.True(t, match.Matches(&arrow.Decimal256Type{Precision: 12, Scale: 10})) + assert.True(t, match.Matches(&arrow.FixedSizeBinaryType{})) + assert.False(t, match.Equals(exec.LargeBinaryLike())) + assert.True(t, match.Equals(exec.FixedSizeBinaryLike())) +} + +func TestPrimitiveMatcher(t *testing.T) { + match := exec.Primitive() + + assert.Equal(t, "primitive", match.String()) + assert.True(t, match.Equals(exec.Primitive())) + + types := []arrow.DataType{ + arrow.FixedWidthTypes.Boolean, + arrow.PrimitiveTypes.Uint8, + arrow.PrimitiveTypes.Int8, + arrow.PrimitiveTypes.Uint16, + arrow.PrimitiveTypes.Int16, + arrow.PrimitiveTypes.Uint32, + arrow.PrimitiveTypes.Int32, + arrow.PrimitiveTypes.Uint64, + arrow.PrimitiveTypes.Int64, + arrow.FixedWidthTypes.Float16, + arrow.PrimitiveTypes.Float32, + arrow.PrimitiveTypes.Float64, + arrow.FixedWidthTypes.Date32, + arrow.FixedWidthTypes.Date64, + arrow.FixedWidthTypes.Time32ms, + arrow.FixedWidthTypes.Time64ns, + arrow.FixedWidthTypes.Timestamp_ms, + arrow.FixedWidthTypes.Duration_ms, + arrow.FixedWidthTypes.MonthInterval, + arrow.FixedWidthTypes.DayTimeInterval, + arrow.FixedWidthTypes.MonthDayNanoInterval, + } + + for _, typ := range types { + assert.True(t, match.Matches(typ)) + } + + assert.False(t, match.Matches(arrow.Null)) +} + +func TestInputTypeAnyType(t *testing.T) { + var ty exec.InputType + assert.Equal(t, exec.InputAny, ty.Kind) +} + +func TestInputType(t *testing.T) { + ty1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) + assert.Equal(t, exec.InputExact, ty1.Kind) + assert.True(t, arrow.TypeEqual(arrow.PrimitiveTypes.Int8, ty1.Type)) + assert.Equal(t, "int8", ty1.String()) + + ty2 := exec.NewIDInput(arrow.DECIMAL) + assert.Equal(t, exec.InputUseMatcher, ty2.Kind) + assert.Equal(t, "Type::DECIMAL128", ty2.String()) + assert.True(t, ty2.Matcher.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) + assert.False(t, ty2.Matcher.Matches(arrow.PrimitiveTypes.Int16)) + + ty3 := exec.NewMatchedInput(exec.TimestampTypeUnit(arrow.Microsecond)) + assert.Equal(t, "timestamp(us)", ty3.String()) + + var ty4 exec.InputType + assert.Equal(t, "any", ty4.String()) + // InputAny matches anything + assert.True(t, ty4.Matches((arrow.DataType)(nil))) +} + +func TestInputTypeEquals(t *testing.T) { + t1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) + t2 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) + t3 := exec.NewExactInput(arrow.PrimitiveTypes.Int32) + + t5 := exec.NewIDInput(arrow.DECIMAL) + t6 := exec.NewIDInput(arrow.DECIMAL) + + assert.True(t, t1.Equals(&t2)) + assert.False(t, t1.Equals(&t3)) + assert.False(t, t1.Equals(&t5)) + assert.True(t, t5.Equals(&t5)) + assert.True(t, t5.Equals(&t6)) + + var ty exec.InputType + assert.True(t, ty.Equals(&exec.InputType{Kind: exec.InputAny})) + + // for now, an ID matcher for arrow.INT32 and a ExactInput for + // arrow.PrimitiveTypes.Int32 are treated as being different. + // this could be made equivalent later if desireable + + // check that field metadata is excluded from equality checks + t7 := exec.NewExactInput(arrow.ListOfField( + arrow.Field{Name: "item", Type: arrow.BinaryTypes.String, + Nullable: true, Metadata: arrow.NewMetadata([]string{"foo"}, []string{"bar"})})) + t8 := exec.NewExactInput(arrow.ListOf(arrow.BinaryTypes.String)) + assert.True(t, t7.Equals(&t8)) +} + +func TestInputTypeHash(t *testing.T) { + var ( + t0 exec.InputType + t1 = exec.NewExactInput(arrow.PrimitiveTypes.Int8) + t2 = exec.NewIDInput(arrow.DECIMAL) + ) + + // these checks try to determine first of all whether hash + // always returns the same value, and whether the elements + // of the type are all incorporated into the hash + assert.Equal(t, t0.Hash(), t0.Hash()) + assert.Equal(t, t1.Hash(), t1.Hash()) + assert.Equal(t, t2.Hash(), t2.Hash()) + assert.NotEqual(t, t0.Hash(), t1.Hash()) + assert.NotEqual(t, t0.Hash(), t2.Hash()) + assert.NotEqual(t, t1.Hash(), t2.Hash()) +} + +func TestInputTypeMatches(t *testing.T) { + in1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) + + assert.True(t, in1.Matches(arrow.PrimitiveTypes.Int8)) + assert.False(t, in1.Matches(arrow.PrimitiveTypes.Int16)) + + in2 := exec.NewIDInput(arrow.DECIMAL) + assert.True(t, in2.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) + + ty2 := &arrow.Decimal128Type{Precision: 12, Scale: 2} + ty3 := arrow.PrimitiveTypes.Float64 + + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + arr2 := array.MakeArrayOfNull(mem, ty2, 1) + arr3 := array.MakeArrayOfNull(mem, ty3, 1) + defer arr2.Release() + defer arr3.Release() + + scalar2, err := scalar.GetScalar(arr2, 0) + assert.NoError(t, err) + + datumArr := compute.NewDatum(arr2) + defer datumArr.Release() + datumScalar := compute.NewDatum(scalar2) + defer datumScalar.Release() + + assert.False(t, in2.Matches(ty3)) + assert.False(t, in2.Matches(arr3.DataType())) +} + +func TestOutputType(t *testing.T) { + ty1 := exec.NewOutputType(arrow.PrimitiveTypes.Int8) + assert.Equal(t, exec.ResolveFixed, ty1.Kind) + assert.True(t, arrow.TypeEqual(arrow.PrimitiveTypes.Int8, ty1.Type)) + + dummyResolver := func(_ *exec.KernelCtx, args []arrow.DataType) (arrow.DataType, error) { + return arrow.PrimitiveTypes.Int32, nil + } + + ty2 := exec.NewComputedOutputType(dummyResolver) + assert.Equal(t, exec.ResolveComputed, ty2.Kind) + + outType2, err := ty2.Resolve(nil, nil) + assert.NoError(t, err) + assert.Same(t, arrow.PrimitiveTypes.Int32, outType2) + + ty3 := ty1 + assert.Equal(t, exec.ResolveFixed, ty3.Kind) + assert.True(t, arrow.TypeEqual(ty1.Type, ty3.Type)) + + ty4 := ty2 + assert.Equal(t, exec.ResolveComputed, ty4.Kind) + outType4, err := ty4.Resolve(nil, nil) + assert.NoError(t, err) + assert.Same(t, arrow.PrimitiveTypes.Int32, outType4) + + assert.Equal(t, "int8", ty3.String()) + assert.Equal(t, "computed", ty4.String()) +} + +func TestOutputTypeResolve(t *testing.T) { + ty1 := exec.NewOutputType(arrow.PrimitiveTypes.Int32) + + result, err := ty1.Resolve(nil, nil) + assert.NoError(t, err) + assert.Same(t, arrow.PrimitiveTypes.Int32, result) + + result, err = ty1.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8}) + assert.NoError(t, err) + assert.Same(t, arrow.PrimitiveTypes.Int32, result) + + result, err = ty1.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int8}) + assert.NoError(t, err) + assert.Same(t, arrow.PrimitiveTypes.Int32, result) + + resolver := func(_ *exec.KernelCtx, args []arrow.DataType) (arrow.DataType, error) { + return args[0], nil + } + ty2 := exec.NewComputedOutputType(resolver) + + result, err = ty2.Resolve(nil, []arrow.DataType{arrow.BinaryTypes.String}) + assert.NoError(t, err) + assert.Same(t, arrow.BinaryTypes.String, result) + + // type resolver that returns an error + ty3 := exec.NewComputedOutputType(func(_ *exec.KernelCtx, dt []arrow.DataType) (arrow.DataType, error) { + // checking the value types versus the function arity should be validated + // elsewhere. this is just for illustration purposes + if len(dt) == 0 { + return nil, fmt.Errorf("%w: need at least one argument", arrow.ErrInvalid) + } + return dt[0], nil + }) + + _, err = ty3.Resolve(nil, []arrow.DataType{}) + assert.ErrorIs(t, err, arrow.ErrInvalid) + + // resolver returns a fixed value + ty4 := exec.NewComputedOutputType(func(*exec.KernelCtx, []arrow.DataType) (arrow.DataType, error) { + return arrow.PrimitiveTypes.Int32, nil + }) + result, err = ty4.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8}) + assert.NoError(t, err) + assert.Same(t, arrow.PrimitiveTypes.Int32, result) + result, err = ty4.Resolve(nil, []arrow.DataType{}) + assert.NoError(t, err) + assert.Same(t, arrow.PrimitiveTypes.Int32, result) +} + +func TestKernelSignatureEquals(t *testing.T) { + sig1 := exec.KernelSignature{ + InputTypes: []exec.InputType{}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String)} + sig1Copy := exec.KernelSignature{ + InputTypes: []exec.InputType{}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String)} + sig2 := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + } + + // output type doesn't matter (for now) + sig3 := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, + OutType: exec.NewOutputType(arrow.PrimitiveTypes.Int32), + } + + sig4 := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8), + exec.NewExactInput(arrow.PrimitiveTypes.Int16), + }, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + } + sig4Copy := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8), + exec.NewExactInput(arrow.PrimitiveTypes.Int16), + }, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + } + sig5 := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8), + exec.NewExactInput(arrow.PrimitiveTypes.Int16), + exec.NewExactInput(arrow.PrimitiveTypes.Int32), + }, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + } + + assert.True(t, sig1.Equals(sig1)) + assert.True(t, sig2.Equals(sig3)) + assert.False(t, sig3.Equals(sig4)) + + // different sig objects but same sig + assert.True(t, sig1.Equals(sig1Copy)) + assert.True(t, sig4.Equals(sig4Copy)) + + // match first 2 args, but not third + assert.False(t, sig4.Equals(sig5)) +} + +func TestKernelSignatureVarArgsEqual(t *testing.T) { + sig1 := exec.KernelSignature{ + InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + IsVarArgs: true, + } + sig2 := exec.KernelSignature{ + InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + IsVarArgs: true, + } + sig3 := exec.KernelSignature{ + InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + } + + assert.True(t, sig1.Equals(sig2)) + assert.False(t, sig2.Equals(sig3)) +} + +func TestKernelSignatureHash(t *testing.T) { + sig1 := exec.KernelSignature{ + InputTypes: []exec.InputType{}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + } + sig2 := exec.KernelSignature{ + InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + } + sig3 := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8), + exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + } + + assert.Equal(t, sig1.Hash(), sig1.Hash()) + assert.Equal(t, sig2.Hash(), sig2.Hash()) + assert.NotEqual(t, sig1.Hash(), sig2.Hash()) + assert.NotEqual(t, sig2.Hash(), sig3.Hash()) +} + +func TestKernelSignatureMatchesInputs(t *testing.T) { + // () -> boolean + sig1 := exec.KernelSignature{ + OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean)} + + assert.True(t, sig1.MatchesInputs([]arrow.DataType{})) + assert.False(t, sig1.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8})) + + // (int8, decimal) -> boolean + sig2 := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8), + exec.NewIDInput(arrow.DECIMAL)}, + OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean), + } + assert.False(t, sig2.MatchesInputs([]arrow.DataType{})) + assert.False(t, sig2.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8})) + assert.True(t, sig2.MatchesInputs([]arrow.DataType{ + arrow.PrimitiveTypes.Int8, + &arrow.Decimal128Type{Precision: 12, Scale: 2}})) + + // (int8, int32) -> boolean + sig3 := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8), + exec.NewExactInput(arrow.PrimitiveTypes.Int32), + }, + OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean), + } + assert.False(t, sig3.MatchesInputs(nil)) + assert.True(t, sig3.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int32})) + assert.False(t, sig3.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int16})) +} + +func TestKernelSignatureVarArgsMatchesInputs(t *testing.T) { + { + sig := exec.KernelSignature{ + InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + IsVarArgs: true, + } + + args := []arrow.DataType{arrow.PrimitiveTypes.Int8} + assert.True(t, sig.MatchesInputs(args)) + args = append(args, arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int8) + assert.True(t, sig.MatchesInputs(args)) + args = append(args, arrow.PrimitiveTypes.Int32) + assert.False(t, sig.MatchesInputs(args)) + } + { + sig := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8), + exec.NewExactInput(arrow.BinaryTypes.String), + }, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + IsVarArgs: true, + } + + args := []arrow.DataType{arrow.PrimitiveTypes.Int8} + assert.True(t, sig.MatchesInputs(args)) + args = append(args, arrow.BinaryTypes.String, arrow.BinaryTypes.String) + assert.True(t, sig.MatchesInputs(args)) + args = append(args, arrow.PrimitiveTypes.Int32) + assert.False(t, sig.MatchesInputs(args)) + } +} + +func TestKernelSignatureToString(t *testing.T) { + inTypes := []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8), + exec.NewIDInput(arrow.DECIMAL), + exec.NewExactInput(arrow.BinaryTypes.String), + } + + sig := exec.KernelSignature{ + InputTypes: inTypes, OutType: exec.NewOutputType(arrow.BinaryTypes.String), + } + assert.Equal(t, "(int8, Type::DECIMAL128, utf8) -> utf8", sig.String()) + + outType := exec.NewComputedOutputType(func(*exec.KernelCtx, []arrow.DataType) (arrow.DataType, error) { + return nil, arrow.ErrInvalid + }) + sig2 := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8), + exec.NewIDInput(arrow.DECIMAL)}, + OutType: outType, + } + assert.Equal(t, "(int8, Type::DECIMAL128) -> computed", sig2.String()) +} + +func TestKernelSignatureVarArgsToString(t *testing.T) { + sig1 := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + IsVarArgs: true, + } + assert.Equal(t, "varargs[int8*] -> utf8", sig1.String()) + + sig2 := exec.KernelSignature{ + InputTypes: []exec.InputType{ + exec.NewExactInput(arrow.BinaryTypes.String), + exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, + OutType: exec.NewOutputType(arrow.BinaryTypes.String), + IsVarArgs: true, + } + assert.Equal(t, "varargs[utf8, int8*] -> utf8", sig2.String()) +} diff --git a/go/arrow/compute/internal/exec/span.go b/go/arrow/compute/internal/exec/span.go new file mode 100644 index 0000000000000..2f4e7330d129d --- /dev/null +++ b/go/arrow/compute/internal/exec/span.go @@ -0,0 +1,542 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exec + +import ( + "reflect" + "unsafe" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" +) + +// BufferSpan is a lightweight Buffer holder for ArraySpans that does not +// take ownership of the underlying memory.Buffer at all or could be +// used to reference raw byte slices instead. +type BufferSpan struct { + // Buf should be the byte slice representing this buffer, if this is + // nil then this bufferspan should be considered empty. + Buf []byte + // Owner should point to an underlying parent memory.Buffer if this + // memory is owned by a different, existing, buffer. Retain is not + // called on this buffer, so it must not be released as long as + // this BufferSpan refers to it. + Owner *memory.Buffer + // SelfAlloc tracks whether or not this bufferspan is the only owner + // of the Owning memory.Buffer. This happens when preallocating + // memory or if a kernel allocates it's own buffer for a result. + // In these cases, we have to know so we can properly maintain the + // refcount if this is later turned into an ArrayData object. + SelfAlloc bool +} + +// SetBuffer sets the given buffer into this BufferSpan and marks +// SelfAlloc as false. This should be called when setting a buffer +// that is externally owned/created. +func (b *BufferSpan) SetBuffer(buf *memory.Buffer) { + b.Buf = buf.Bytes() + b.Owner = buf + b.SelfAlloc = false +} + +// WrapBuffer wraps this bufferspan around a buffer and marks +// SelfAlloc as true. This should be called when setting a buffer +// that was allocated as part of an execution rather than just +// re-using an existing buffer from an input array. +func (b *BufferSpan) WrapBuffer(buf *memory.Buffer) { + b.Buf = buf.Bytes() + b.Owner = buf + b.SelfAlloc = true +} + +// ArraySpan is a light-weight, non-owning version of arrow.ArrayData +// for more efficient handling with computation and engines. We use +// explicit go Arrays to define the buffers and some scratch space +// for easily populating and shifting around pointers to memory without +// having to worry about and deal with retain/release during calculations. +type ArraySpan struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]BufferSpan + + // Scratch is a holding spot for things such as + // offsets or union type codes when converting from scalars + Scratch [2]uint64 + + Children []ArraySpan +} + +// UpdateNullCount will count the bits in the null bitmap and update the +// number of nulls if the current null count is unknown, otherwise it just +// returns the value of a.Nulls +func (a *ArraySpan) UpdateNullCount() int64 { + if a.Nulls != array.UnknownNullCount { + return a.Nulls + } + + a.Nulls = a.Len - int64(bitutil.CountSetBits(a.Buffers[0].Buf, int(a.Offset), int(a.Len))) + return a.Nulls +} + +// Dictionary returns a pointer to the array span for the dictionary which +// we will always place as the first (and only) child if it exists. +func (a *ArraySpan) Dictionary() *ArraySpan { return &a.Children[0] } + +// NumBuffers returns the number of expected buffers for this type +func (a *ArraySpan) NumBuffers() int { return getNumBuffers(a.Type) } + +// MakeData generates an arrow.ArrayData object for this ArraySpan, +// properly updating the buffer ref count if necessary. +func (a *ArraySpan) MakeData() arrow.ArrayData { + bufs := make([]*memory.Buffer, a.NumBuffers()) + for i := range bufs { + b := a.GetBuffer(i) + bufs[i] = b + if b != nil && a.Buffers[i].SelfAlloc { + // if this buffer is just a pointer to another existing buffer + // then we never bumped the refcount for that buffer. + // As a result, we won't call release here so that the call + // to array.NewData properly updates the ref counts of the buffers. + // If instead this buffer was allocated during calculation + // (such as during prealloc or by a kernel itself) + // then we need to release after we create the ArrayData so that it + // maintains the correct refcount of 1, giving the resulting + // ArrayData object ownership of this buffer. + defer b.Release() + } + } + + var ( + nulls = int(a.Nulls) + length = int(a.Len) + off = int(a.Offset) + dt = a.Type + children []arrow.ArrayData + ) + + if a.Type.ID() == arrow.NULL { + nulls = int(length) + } else if len(a.Buffers[0].Buf) == 0 { + nulls = 0 + } + + // we use a.Type for the NewData call at the end, so we can + // handle extension types by using dt to point to the storage type + // and let the proper extension type get set into the ArrayData + // object we return. + if dt.ID() == arrow.EXTENSION { + dt = dt.(arrow.ExtensionType).StorageType() + } + + if dt.ID() == arrow.DICTIONARY { + result := array.NewData(a.Type, length, bufs, nil, nulls, off) + dict := a.Dictionary().MakeData() + defer dict.Release() + result.SetDictionary(dict) + return result + } + + if len(a.Children) > 0 { + children = make([]arrow.ArrayData, len(a.Children)) + for i, c := range a.Children { + d := c.MakeData() + defer d.Release() + children[i] = d + } + } + return array.NewData(a.Type, length, bufs, children, nulls, off) +} + +// MakeArray is a convenience function for calling array.MakeFromData(a.MakeData()) +func (a *ArraySpan) MakeArray() arrow.Array { + d := a.MakeData() + defer d.Release() + return array.MakeFromData(d) +} + +// SetSlice updates the offset and length of this ArraySpan to refer to +// a specific slice of the underlying buffers. +func (a *ArraySpan) SetSlice(off, length int64) { + a.Offset, a.Len = off, length + if a.Type.ID() != arrow.NULL { + a.Nulls = array.UnknownNullCount + } else { + a.Nulls = a.Len + } +} + +// GetBuffer returns the buffer for the requested index. If this buffer +// is owned by another array/arrayspan the Owning buffer is returned, +// otherwise if this slice has no owning buffer, we call NewBufferBytes +// to wrap it as a memory.Buffer. Can also return nil if there is no +// buffer in this index. +func (a *ArraySpan) GetBuffer(idx int) *memory.Buffer { + buf := a.Buffers[idx] + switch { + case buf.Owner != nil: + return buf.Owner + case buf.Buf != nil: + return memory.NewBufferBytes(buf.Buf) + } + return nil +} + +// convenience function to resize the children slice if necessary, +// or just shrink the slice without re-allocating if there's enough +// capacity already. +func (a *ArraySpan) resizeChildren(i int) { + if cap(a.Children) >= i { + a.Children = a.Children[:i] + } else { + a.Children = make([]ArraySpan, i) + } +} + +// convenience function for populating the offsets buffer from a scalar +// value's size. +func setOffsetsForScalar[T int32 | int64](span *ArraySpan, buf []T, valueSize int64, bufidx int) { + buf[0] = 0 + buf[1] = T(valueSize) + + b := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + s := (*reflect.SliceHeader)(unsafe.Pointer(&span.Buffers[bufidx].Buf)) + s.Data = b.Data + s.Len = 2 * int(unsafe.Sizeof(T(0))) + s.Cap = s.Len + + span.Buffers[bufidx].Owner = nil + span.Buffers[bufidx].SelfAlloc = false +} + +// FillFromScalar populates this ArraySpan as if it were a 1 length array +// with the single value equal to the passed in Scalar. +func (a *ArraySpan) FillFromScalar(val scalar.Scalar) { + var ( + trueBit byte = 0x01 + falseBit byte = 0x00 + ) + + a.Type = val.DataType() + a.Len = 1 + typeID := a.Type.ID() + if val.IsValid() { + a.Nulls = 0 + } else { + a.Nulls = 1 + } + + if !arrow.IsUnion(typeID) && typeID != arrow.NULL { + if val.IsValid() { + a.Buffers[0].Buf = []byte{trueBit} + } else { + a.Buffers[0].Buf = []byte{falseBit} + } + a.Buffers[0].Owner = nil + a.Buffers[0].SelfAlloc = false + } + + switch { + case typeID == arrow.BOOL: + if val.(*scalar.Boolean).Value { + a.Buffers[1].Buf = []byte{trueBit} + } else { + a.Buffers[1].Buf = []byte{falseBit} + } + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + case arrow.IsPrimitive(typeID) || arrow.IsDecimal(typeID): + sc := val.(scalar.PrimitiveScalar) + a.Buffers[1].Buf = sc.Data() + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + case typeID == arrow.DICTIONARY: + sc := val.(scalar.PrimitiveScalar) + a.Buffers[1].Buf = sc.Data() + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + a.resizeChildren(1) + a.Children[0].SetMembers(val.(*scalar.Dictionary).Value.Dict.Data()) + case arrow.IsBaseBinary(typeID): + sc := val.(scalar.BinaryScalar) + a.Buffers[1].Buf = arrow.Uint64Traits.CastToBytes(a.Scratch[:]) + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + + var dataBuffer []byte + if sc.IsValid() { + dataBuffer = sc.Data() + a.Buffers[2].Owner = sc.Buffer() + a.Buffers[2].SelfAlloc = false + } + if arrow.IsBinaryLike(typeID) { + setOffsetsForScalar(a, + unsafe.Slice((*int32)(unsafe.Pointer(&a.Scratch[0])), 2), + int64(len(dataBuffer)), 1) + } else { + // large_binary_like + setOffsetsForScalar(a, + unsafe.Slice((*int64)(unsafe.Pointer(&a.Scratch[0])), 2), + int64(len(dataBuffer)), 1) + } + a.Buffers[2].Buf = dataBuffer + case typeID == arrow.FIXED_SIZE_BINARY: + sc := val.(scalar.BinaryScalar) + a.Buffers[1].Buf = sc.Data() + a.Buffers[1].Owner = sc.Buffer() + a.Buffers[1].SelfAlloc = false + case arrow.IsListLike(typeID): + sc := val.(scalar.ListScalar) + valueLen := 0 + a.resizeChildren(1) + + if sc.GetList() != nil { + a.Children[0].SetMembers(sc.GetList().Data()) + valueLen = sc.GetList().Len() + } else { + // even when the value is null, we must populate + // child data to yield a valid array. ugh + FillZeroLength(sc.DataType().(arrow.NestedType).Fields()[0].Type, &a.Children[0]) + } + + switch typeID { + case arrow.LIST, arrow.MAP: + setOffsetsForScalar(a, + unsafe.Slice((*int32)(unsafe.Pointer(&a.Scratch[0])), 2), + int64(valueLen), 1) + case arrow.LARGE_LIST: + setOffsetsForScalar(a, + unsafe.Slice((*int64)(unsafe.Pointer(&a.Scratch[0])), 2), + int64(valueLen), 1) + default: + // fixed size list has no second buffer + a.Buffers[1].Buf, a.Buffers[1].Owner = nil, nil + a.Buffers[1].SelfAlloc = false + } + case typeID == arrow.STRUCT: + sc := val.(*scalar.Struct) + a.Buffers[1].Buf = nil + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + a.resizeChildren(len(sc.Value)) + for i, v := range sc.Value { + a.Children[i].FillFromScalar(v) + } + case arrow.IsUnion(typeID): + // first buffer is kept null since unions have no validity vector + a.Buffers[0].Buf, a.Buffers[0].Owner = nil, nil + a.Buffers[0].SelfAlloc = false + + a.Buffers[1].Buf = arrow.Uint64Traits.CastToBytes(a.Scratch[:])[:1] + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + codes := unsafe.Slice((*arrow.UnionTypeCode)(unsafe.Pointer(&a.Buffers[1].Buf[0])), 1) + + a.resizeChildren(len(a.Type.(arrow.UnionType).Fields())) + switch sc := val.(type) { + case *scalar.DenseUnion: + codes[0] = sc.TypeCode + // has offset, start 4 bytes in so it's aligned to the 32-bit boundaries + off := unsafe.Slice((*int32)(unsafe.Add(unsafe.Pointer(&a.Scratch[0]), arrow.Int32SizeBytes)), 2) + setOffsetsForScalar(a, off, 1, 2) + // we can't "see" the other arrays in the union, but we put the "active" + // union array in the right place and fill zero-length arrays for + // the others. + childIDS := a.Type.(arrow.UnionType).ChildIDs() + for i, f := range a.Type.(arrow.UnionType).Fields() { + if i == childIDS[sc.TypeCode] { + a.Children[i].FillFromScalar(sc.Value) + } else { + FillZeroLength(f.Type, &a.Children[i]) + } + } + case *scalar.SparseUnion: + codes[0] = sc.TypeCode + // sparse union scalars have a full complement of child values + // even though only one of them is relevant, so we just fill them + // in here + for i, v := range sc.Value { + a.Children[i].FillFromScalar(v) + } + } + case typeID == arrow.EXTENSION: + // pass through storage + sc := val.(*scalar.Extension) + a.FillFromScalar(sc.Value) + // restore the extension type + a.Type = val.DataType() + case typeID == arrow.NULL: + for i := range a.Buffers { + a.Buffers[i].Buf = nil + a.Buffers[i].Owner = nil + a.Buffers[i].SelfAlloc = false + } + } +} + +// SetMembers populates this ArraySpan from the given ArrayData object. +// As this is a non-owning reference, the ArrayData object must not +// be fully released while this ArraySpan is in use, otherwise any buffers +// referenced will be released too +func (a *ArraySpan) SetMembers(data arrow.ArrayData) { + a.Type = data.DataType() + a.Len = int64(data.Len()) + if a.Type.ID() == arrow.NULL { + a.Nulls = a.Len + } else { + a.Nulls = int64(data.NullN()) + } + a.Offset = int64(data.Offset()) + + for i, b := range data.Buffers() { + if b != nil { + a.Buffers[i].SetBuffer(b) + } else { + a.Buffers[i].Buf = nil + a.Buffers[i].Owner = nil + a.Buffers[i].SelfAlloc = false + } + } + + typeID := a.Type.ID() + if a.Buffers[0].Buf == nil { + switch typeID { + case arrow.NULL, arrow.SPARSE_UNION, arrow.DENSE_UNION: + default: + // should already be zero, but we make sure + a.Nulls = 0 + } + } + + for i := len(data.Buffers()); i < 3; i++ { + a.Buffers[i].Buf = nil + a.Buffers[i].Owner = nil + a.Buffers[i].SelfAlloc = false + } + + if typeID == arrow.DICTIONARY { + if cap(a.Children) >= 1 { + a.Children = a.Children[:1] + } else { + a.Children = make([]ArraySpan, 1) + } + a.Children[0].SetMembers(data.Dictionary()) + } else { + if cap(a.Children) >= len(data.Children()) { + a.Children = a.Children[:len(data.Children())] + } else { + a.Children = make([]ArraySpan, len(data.Children())) + } + for i, c := range data.Children() { + a.Children[i].SetMembers(c) + } + } +} + +// ExecValue represents a single input to an execution which could +// be either an Array (ArraySpan) or a Scalar value +type ExecValue struct { + Array ArraySpan + Scalar scalar.Scalar +} + +func (e *ExecValue) IsArray() bool { return e.Scalar == nil } +func (e *ExecValue) IsScalar() bool { return !e.IsArray() } + +func (e *ExecValue) Type() arrow.DataType { + if e.IsArray() { + return e.Array.Type + } + return e.Scalar.DataType() +} + +// ExecResult is the result of a kernel execution and should be populated +// by the execution functions and/or a kernel. For now we're just going to +// alias an ArraySpan. +type ExecResult = ArraySpan + +// ExecSpan represents a slice of inputs and is used to provide slices +// of input values to iterate over. +// +// Len is the length of the span (all elements in Values should either +// be scalar or an array with a length + offset of at least Len). +type ExecSpan struct { + Len int64 + Values []ExecValue +} + +func getNumBuffers(dt arrow.DataType) int { + switch dt.ID() { + case arrow.NULL, arrow.STRUCT, arrow.FIXED_SIZE_LIST: + return 1 + case arrow.BINARY, arrow.LARGE_BINARY, arrow.STRING, arrow.LARGE_STRING, arrow.DENSE_UNION: + return 3 + case arrow.EXTENSION: + return getNumBuffers(dt.(arrow.ExtensionType).StorageType()) + default: + return 2 + } +} + +// FillZeroLength fills an ArraySpan with the appropriate information for +// a Zero Length Array of the provided type. +func FillZeroLength(dt arrow.DataType, span *ArraySpan) { + span.Scratch[0], span.Scratch[1] = 0, 0 + span.Type = dt + span.Len = 0 + numBufs := getNumBuffers(dt) + for i := 0; i < numBufs; i++ { + span.Buffers[i].Buf = arrow.Uint64Traits.CastToBytes(span.Scratch[:])[:0] + span.Buffers[i].Owner = nil + } + + for i := numBufs; i < 3; i++ { + span.Buffers[i].Buf, span.Buffers[i].Owner = nil, nil + } + + nt, ok := dt.(arrow.NestedType) + if !ok { + if len(span.Children) > 0 { + span.Children = span.Children[:0] + } + return + } + + if cap(span.Children) >= len(nt.Fields()) { + span.Children = span.Children[:len(nt.Fields())] + } else { + span.Children = make([]ArraySpan, len(nt.Fields())) + } + for i, f := range nt.Fields() { + FillZeroLength(f.Type, &span.Children[i]) + } +} + +// PromoteExecSpanScalars promotes the values of the passed in ExecSpan +// from scalars to Arrays of length 1 for each value. +func PromoteExecSpanScalars(span ExecSpan) { + for i := range span.Values { + if span.Values[i].Scalar != nil { + span.Values[i].Array.FillFromScalar(span.Values[i].Scalar) + span.Values[i].Scalar = nil + } + } +} diff --git a/go/arrow/compute/internal/exec/span_test.go b/go/arrow/compute/internal/exec/span_test.go new file mode 100644 index 0000000000000..9187dae5d51be --- /dev/null +++ b/go/arrow/compute/internal/exec/span_test.go @@ -0,0 +1,834 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exec_test + +import ( + "reflect" + "strings" + "testing" + "unsafe" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/compute/internal/exec" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" + "github.com/stretchr/testify/assert" +) + +func TestBufferSpan_SetBuffer(t *testing.T) { + type fields struct { + Buf []byte + Owner *memory.Buffer + SelfAlloc bool + } + type args struct { + buf *memory.Buffer + } + foo := []byte{0xde, 0xad, 0xbe, 0xef} + own := memory.NewBufferBytes(foo) + tests := []struct { + name string + fields fields + args args + }{ + {"simple", fields{SelfAlloc: true}, args{own}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := &exec.BufferSpan{ + Buf: tt.fields.Buf, + Owner: tt.fields.Owner, + SelfAlloc: tt.fields.SelfAlloc, + } + b.SetBuffer(tt.args.buf) + assert.Same(t, &foo[0], &b.Buf[0]) + assert.Same(t, own, b.Owner) + assert.False(t, b.SelfAlloc) + }) + } +} + +func TestBufferSpan_WrapBuffer(t *testing.T) { + type fields struct { + Buf []byte + Owner *memory.Buffer + SelfAlloc bool + } + type args struct { + buf *memory.Buffer + } + foo := []byte{0xde, 0xad, 0xbe, 0xef} + own := memory.NewBufferBytes(foo) + tests := []struct { + name string + fields fields + args args + }{ + {"simple", fields{SelfAlloc: false}, args{own}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := &exec.BufferSpan{ + Buf: tt.fields.Buf, + Owner: tt.fields.Owner, + SelfAlloc: tt.fields.SelfAlloc, + } + b.WrapBuffer(tt.args.buf) + assert.Same(t, &foo[0], &b.Buf[0]) + assert.Same(t, own, b.Owner) + assert.True(t, b.SelfAlloc) + }) + } +} + +func TestArraySpan_UpdateNullCount(t *testing.T) { + type fields struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]exec.BufferSpan + Scratch [2]uint64 + Children []exec.ArraySpan + } + tests := []struct { + name string + fields fields + want int64 + }{ + {"known", fields{Nulls: 25}, 25}, + {"unknown", fields{ + Nulls: array.UnknownNullCount, + Len: 8, // 0b01101101 + Buffers: [3]exec.BufferSpan{{Buf: []byte{109}}, {}, {}}}, 3}, + {"unknown with offset", fields{ + Nulls: array.UnknownNullCount, + Len: 4, + Offset: 2, // 0b01101101 + Buffers: [3]exec.BufferSpan{{Buf: []byte{109}}, {}, {}}}, 1}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &exec.ArraySpan{ + Type: tt.fields.Type, + Len: tt.fields.Len, + Nulls: tt.fields.Nulls, + Offset: tt.fields.Offset, + Buffers: tt.fields.Buffers, + Scratch: tt.fields.Scratch, + Children: tt.fields.Children, + } + if got := a.UpdateNullCount(); got != tt.want { + t.Errorf("ArraySpan.UpdateNullCount() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestArraySpan_Dictionary(t *testing.T) { + type fields struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]exec.BufferSpan + Scratch [2]uint64 + Children []exec.ArraySpan + } + children := []exec.ArraySpan{{}} + tests := []struct { + name string + fields fields + want *exec.ArraySpan + }{ + {"basic", fields{Children: children}, &children[0]}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &exec.ArraySpan{ + Type: tt.fields.Type, + Len: tt.fields.Len, + Nulls: tt.fields.Nulls, + Offset: tt.fields.Offset, + Buffers: tt.fields.Buffers, + Scratch: tt.fields.Scratch, + Children: tt.fields.Children, + } + if got := a.Dictionary(); !reflect.DeepEqual(got, tt.want) { + t.Errorf("ArraySpan.Dictionary() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestArraySpan_NumBuffers(t *testing.T) { + type fields struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]exec.BufferSpan + Scratch [2]uint64 + Children []exec.ArraySpan + } + + arrow.RegisterExtensionType(types.NewUUIDType()) + defer arrow.UnregisterExtensionType("uuid") + + tests := []struct { + name string + fields fields + want int + }{ + {"null", fields{Type: arrow.Null}, 1}, + {"struct", fields{Type: arrow.StructOf()}, 1}, + {"fixed size list", fields{Type: arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int32)}, 1}, + {"binary", fields{Type: arrow.BinaryTypes.Binary}, 3}, + {"large binary", fields{Type: arrow.BinaryTypes.LargeBinary}, 3}, + {"string", fields{Type: arrow.BinaryTypes.String}, 3}, + {"large string", fields{Type: arrow.BinaryTypes.LargeString}, 3}, + {"extension", fields{Type: types.NewUUIDType()}, 2}, + {"int32", fields{Type: arrow.PrimitiveTypes.Int32}, 2}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &exec.ArraySpan{ + Type: tt.fields.Type, + Len: tt.fields.Len, + Nulls: tt.fields.Nulls, + Offset: tt.fields.Offset, + Buffers: tt.fields.Buffers, + Scratch: tt.fields.Scratch, + Children: tt.fields.Children, + } + if got := a.NumBuffers(); got != tt.want { + t.Errorf("ArraySpan.NumBuffers() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestArraySpan_MakeData(t *testing.T) { + type fields struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]exec.BufferSpan + Scratch [2]uint64 + Children []exec.ArraySpan + } + + var ( + buf1 *memory.Buffer + ) + arrow.RegisterExtensionType(types.NewDictExtensionType()) + defer arrow.UnregisterExtensionType("dict-extension") + + tests := []struct { + name string + fields func(mem memory.Allocator) fields + want func(mem memory.Allocator) arrow.ArrayData + }{ + {"null type", func(mem memory.Allocator) fields { + return fields{ + Type: arrow.Null, + Len: 5, + Nulls: array.UnknownNullCount, + } + }, func(mem memory.Allocator) arrow.ArrayData { + return array.NewData(arrow.Null, 5, []*memory.Buffer{nil}, nil, 5, 0) + }}, + {"zero len", func(mem memory.Allocator) fields { + return fields{Type: arrow.PrimitiveTypes.Int32} + }, func(mem memory.Allocator) arrow.ArrayData { + return array.NewData(arrow.PrimitiveTypes.Int32, 0, []*memory.Buffer{nil, nil}, nil, 0, 0) + }}, + {"non-owning offset", func(mem memory.Allocator) fields { + ret := fields{ + Type: arrow.PrimitiveTypes.Int8, + Len: 4, + Nulls: 1, + Offset: 1, + } + buf1 = memory.NewResizableBuffer(mem) + buf1.Resize(1) + buf1.Bytes()[0] = 109 + ret.Buffers[0].SetBuffer(buf1) + ret.Buffers[1].SetBuffer(memory.NewBufferBytes([]byte{5, 5, 5, 5, 5})) + return ret + }, func(mem memory.Allocator) arrow.ArrayData { + // created in the above func, we release after constructing + // the NewData so the refcount is as expected + defer buf1.Release() + return array.NewData(arrow.PrimitiveTypes.Int8, 4, + []*memory.Buffer{buf1, memory.NewBufferBytes([]byte{5, 5, 5, 5, 5})}, nil, 1, 1) + }}, + {"self-alloc", func(mem memory.Allocator) fields { + ret := fields{ + Type: arrow.PrimitiveTypes.Int8, + Len: 4, + } + buf := memory.NewResizableBuffer(mem) + buf.Resize(1) + ret.Buffers[0].WrapBuffer(buf) + buf2 := memory.NewResizableBuffer(mem) + buf2.Resize(4) + ret.Buffers[1].WrapBuffer(buf2) + return ret + }, func(mem memory.Allocator) arrow.ArrayData { + buf := memory.NewResizableBuffer(mem) + buf.Resize(1) + defer buf.Release() + buf2 := memory.NewResizableBuffer(mem) + buf2.Resize(4) + defer buf2.Release() + return array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{buf, buf2}, nil, 0, 0) + }}, + {"with children", func(mem memory.Allocator) fields { + ret := fields{ + Type: arrow.ListOf(arrow.PrimitiveTypes.Int8), + Len: 1, + Children: []exec.ArraySpan{{ + Type: arrow.PrimitiveTypes.Int8, + Len: 4, + }}, + } + var offsets [8]byte + endian.Native.PutUint32(offsets[4:], 4) + ret.Buffers[1].SetBuffer(memory.NewBufferBytes(offsets[:])) + buf := memory.NewResizableBuffer(mem) + buf.Resize(4) + buf.Bytes()[0] = 1 + buf.Bytes()[1] = 2 + buf.Bytes()[2] = 3 + buf.Bytes()[3] = 4 + + ret.Children[0].Buffers[1].WrapBuffer(buf) + return ret + }, func(mem memory.Allocator) arrow.ArrayData { + buf := memory.NewResizableBuffer(mem) + buf.Resize(4) + buf.Bytes()[0] = 1 + buf.Bytes()[1] = 2 + buf.Bytes()[2] = 3 + buf.Bytes()[3] = 4 + defer buf.Release() + child := array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{nil, buf}, nil, 0, 0) + defer child.Release() + + var offsets [8]byte + endian.Native.PutUint32(offsets[4:], 4) + + return array.NewData(arrow.ListOf(arrow.PrimitiveTypes.Int8), 1, + []*memory.Buffer{nil, memory.NewBufferBytes(offsets[:])}, + []arrow.ArrayData{child}, 0, 0) + }}, + {"dict-extension-type", func(mem memory.Allocator) fields { + // dict-extension-type is dict(Index: int8, Value: string) + // so there should be an int8 in the arrayspan and + // a child of a string arrayspan in the first index of + // Children + ret := fields{ + Type: types.NewDictExtensionType(), + Len: 1, + Children: []exec.ArraySpan{{ + Type: arrow.BinaryTypes.String, + Len: 2, + }}, + } + + indices := memory.NewResizableBuffer(mem) + indices.Resize(1) + indices.Bytes()[0] = 1 + ret.Buffers[1].WrapBuffer(indices) + + offsets := memory.NewResizableBuffer(mem) + offsets.Resize(3 * arrow.Int32SizeBytes) + copy(offsets.Bytes(), arrow.Int32Traits.CastToBytes([]int32{0, 5, 10})) + + values := memory.NewResizableBuffer(mem) + values.Resize(len("HelloWorld")) + copy(values.Bytes(), []byte("HelloWorld")) + + nulls := memory.NewResizableBuffer(mem) + nulls.Resize(1) + nulls.Bytes()[0] = 3 + ret.Children[0].Buffers[0].WrapBuffer(nulls) + ret.Children[0].Buffers[1].WrapBuffer(offsets) + ret.Children[0].Buffers[2].WrapBuffer(values) + + return ret + }, func(mem memory.Allocator) arrow.ArrayData { + dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World"]`)) + defer dict.Release() + index, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1]`)) + defer index.Release() + + out := array.NewData(types.NewDictExtensionType(), 1, []*memory.Buffer{nil, index.Data().Buffers()[1]}, nil, 0, 0) + out.SetDictionary(dict.Data()) + return out + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("MakeData", func(t *testing.T) { + f := tt.fields(mem) + a := &exec.ArraySpan{ + Type: f.Type, + Len: f.Len, + Nulls: f.Nulls, + Offset: f.Offset, + Buffers: f.Buffers, + Scratch: f.Scratch, + Children: f.Children, + } + got := a.MakeData() + want := tt.want(mem) + if !reflect.DeepEqual(got, want) { + t.Errorf("ArraySpan.MakeData() = %v, want %v", got, want) + } + want.Release() + got.Release() + }) + + t.Run("MakeArray", func(t *testing.T) { + f := tt.fields(mem) + a := &exec.ArraySpan{ + Type: f.Type, + Len: f.Len, + Nulls: f.Nulls, + Offset: f.Offset, + Buffers: f.Buffers, + Scratch: f.Scratch, + Children: f.Children, + } + arr := a.MakeArray() + want := tt.want(mem) + defer want.Release() + exp := array.MakeFromData(want) + + assert.Truef(t, array.Equal(arr, exp), "expected: %s\ngot: %s", exp, arr) + + exp.Release() + arr.Release() + }) + }) + } +} + +func TestArraySpan_SetSlice(t *testing.T) { + type fields struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]exec.BufferSpan + Scratch [2]uint64 + Children []exec.ArraySpan + } + type args struct { + off int64 + length int64 + } + tests := []struct { + name string + fields fields + args args + wantNulls int64 + }{ + {"null type", fields{Type: arrow.Null}, args{5, 10}, 10}, + {"not-null type", fields{Type: arrow.PrimitiveTypes.Int8}, args{5, 10}, array.UnknownNullCount}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &exec.ArraySpan{ + Type: tt.fields.Type, + Len: tt.fields.Len, + Nulls: tt.fields.Nulls, + Offset: tt.fields.Offset, + Buffers: tt.fields.Buffers, + Scratch: tt.fields.Scratch, + Children: tt.fields.Children, + } + a.SetSlice(tt.args.off, tt.args.length) + assert.Equal(t, tt.args.off, a.Offset) + assert.Equal(t, tt.args.length, a.Len) + assert.Equal(t, tt.wantNulls, a.Nulls) + }) + } +} + +func TestArraySpan_FillFromScalar(t *testing.T) { + var ( + expDecimalBuf [arrow.Decimal128SizeBytes]byte + expScratch [2]uint64 + ) + + endian.Native.PutUint64(expDecimalBuf[:], 1234) + endian.Native.PutUint32(arrow.Uint64Traits.CastToBytes(expScratch[:])[4:], 10) + + dict, _, _ := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World"]`)) + defer dict.Release() + + tests := []struct { + name string + args scalar.Scalar + exp exec.ArraySpan + }{ + {"null-type", + scalar.MakeNullScalar(arrow.Null), + exec.ArraySpan{Type: arrow.Null, Len: 1, Nulls: 1}}, + {"bool valid", + scalar.MakeScalar(true), + exec.ArraySpan{ + Type: arrow.FixedWidthTypes.Boolean, + Len: 1, + Nulls: 0, + Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{0x01}}, {}}, + }}, + {"bool valid false", + scalar.MakeScalar(false), + exec.ArraySpan{ + Type: arrow.FixedWidthTypes.Boolean, + Len: 1, + Nulls: 0, + Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{0x00}}, {}}, + }}, + {"primitive null", + scalar.MakeNullScalar(arrow.PrimitiveTypes.Int32), + exec.ArraySpan{ + Type: arrow.PrimitiveTypes.Int32, + Len: 1, + Nulls: 1, + Buffers: [3]exec.BufferSpan{{Buf: []byte{0x00}}, {Buf: []byte{0, 0, 0, 0}}, {}}, + }}, + {"decimal valid", + scalar.NewDecimal128Scalar(decimal128.FromU64(1234), &arrow.Decimal128Type{Precision: 12, Scale: 2}), + exec.ArraySpan{ + Type: &arrow.Decimal128Type{Precision: 12, Scale: 2}, + Len: 1, + Nulls: 0, + Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: expDecimalBuf[:]}, {}}, + }}, + {"dictionary scalar", + scalar.NewDictScalar(scalar.NewInt8Scalar(1), dict), + exec.ArraySpan{ + Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String}, + Len: 1, + Nulls: 0, + Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, + {Buf: []byte{1}}, {}, + }, + Children: []exec.ArraySpan{{ + Type: arrow.BinaryTypes.String, + Len: 2, + Buffers: [3]exec.BufferSpan{ + {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, + {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, + }, + }}, + }, + }, + {"binary scalar", + scalar.NewBinaryScalar(dict.Data().Buffers()[2], arrow.BinaryTypes.String), + exec.ArraySpan{ + Type: arrow.BinaryTypes.String, + Len: 1, + Nulls: 0, + Scratch: expScratch, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x01}}, + {Buf: arrow.Uint64Traits.CastToBytes(expScratch[:1])}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}}, + }, + }, + {"large binary", + scalar.NewLargeStringScalarFromBuffer(dict.Data().Buffers()[2]), + exec.ArraySpan{ + Type: arrow.BinaryTypes.LargeString, + Len: 1, + Nulls: 0, + Scratch: [2]uint64{0, 10}, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x01}}, + {Buf: arrow.Uint64Traits.CastToBytes([]uint64{0, 10})}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}}, + }}, + {"fixed size binary", + scalar.NewFixedSizeBinaryScalar(dict.Data().Buffers()[2], &arrow.FixedSizeBinaryType{ByteWidth: 10}), + exec.ArraySpan{ + Type: &arrow.FixedSizeBinaryType{ByteWidth: 10}, + Len: 1, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x01}}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, {}, + }, + }}, + {"map scalar null value", + scalar.MakeNullScalar(arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String)), + exec.ArraySpan{ + Type: arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), + Len: 1, + Nulls: 1, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0}}, + {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, + {}, + }, + Children: []exec.ArraySpan{{ + Type: arrow.StructOf(arrow.Field{Name: "key", Type: arrow.PrimitiveTypes.Int8}, + arrow.Field{Name: "value", Type: arrow.BinaryTypes.String, Nullable: true}), + Len: 0, + Nulls: 0, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{}}, {}, {}, + }, + Children: []exec.ArraySpan{ + { + Type: arrow.PrimitiveTypes.Int8, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{}}, {Buf: []byte{}}, {}, + }, + }, + { + Type: arrow.BinaryTypes.String, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{}}, {Buf: []byte{}}, {Buf: []byte{}}, + }, + }, + }, + }}, + }}, + {"list scalar", + scalar.NewListScalarData(dict.Data()), + exec.ArraySpan{ + Type: arrow.ListOf(arrow.BinaryTypes.String), + Len: 1, + Scratch: [2]uint64{ + *(*uint64)(unsafe.Pointer(&[]int32{0, 2}[0])), + 0, + }, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: arrow.Int32Traits.CastToBytes([]int32{0, 2})}, + }, + Children: []exec.ArraySpan{{ + Type: arrow.BinaryTypes.String, + Len: 2, + Buffers: [3]exec.BufferSpan{ + {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, + {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, + }, + }}, + }, + }, + {"large list scalar", + scalar.NewLargeListScalarData(dict.Data()), + exec.ArraySpan{ + Type: arrow.LargeListOf(arrow.BinaryTypes.String), + Len: 1, + Scratch: [2]uint64{0, 2}, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: arrow.Int64Traits.CastToBytes([]int64{0, 2})}, + }, + Children: []exec.ArraySpan{{ + Type: arrow.BinaryTypes.String, + Len: 2, + Buffers: [3]exec.BufferSpan{ + {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, + {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, + }, + }}, + }, + }, + {"fixed size list", + scalar.NewFixedSizeListScalar(dict), + exec.ArraySpan{ + Type: arrow.FixedSizeListOf(2, arrow.BinaryTypes.String), + Len: 1, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x1}}, + {}, {}, + }, + Children: []exec.ArraySpan{{ + Type: arrow.BinaryTypes.String, + Len: 2, + Buffers: [3]exec.BufferSpan{ + {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, + {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, + }, + }}, + }, + }, + {"struct scalar", + func() scalar.Scalar { + s, _ := scalar.NewStructScalarWithNames([]scalar.Scalar{ + scalar.MakeScalar(int32(5)), scalar.MakeScalar(uint8(10)), + }, []string{"int32", "uint8"}) + return s + }(), + exec.ArraySpan{ + Type: arrow.StructOf( + arrow.Field{Name: "int32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + arrow.Field{Name: "uint8", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}), + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x1}}, {}, {}, + }, + Len: 1, + Children: []exec.ArraySpan{ + { + Type: arrow.PrimitiveTypes.Int32, + Len: 1, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: arrow.Int32Traits.CastToBytes([]int32{5})}, + {}, + }, + }, + { + Type: arrow.PrimitiveTypes.Uint8, + Len: 1, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: []byte{10}}, + {}, + }, + }, + }, + }, + }, + {"dense union scalar", + func() scalar.Scalar { + dt := arrow.UnionOf(arrow.DenseMode, []arrow.Field{ + {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + }, []arrow.UnionTypeCode{3, 42, 43}) + return scalar.NewDenseUnionScalar(scalar.MakeScalar(uint64(25)), 42, dt.(*arrow.DenseUnionType)) + }(), + exec.ArraySpan{ + Type: arrow.UnionOf(arrow.DenseMode, []arrow.Field{ + {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + }, []arrow.UnionTypeCode{3, 42, 43}), + Len: 1, + Scratch: [2]uint64{42, 1}, + Buffers: [3]exec.BufferSpan{{}, + {Buf: []byte{42}}, {Buf: arrow.Int32Traits.CastToBytes([]int32{0, 1})}, + }, + Children: []exec.ArraySpan{ + { + Type: arrow.BinaryTypes.String, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{}}, {Buf: []byte{}}, {Buf: []byte{}}, + }, + }, + { + Type: arrow.PrimitiveTypes.Uint64, + Len: 1, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: arrow.Uint64Traits.CastToBytes([]uint64{25})}, + {}, + }, + }, + { + Type: arrow.PrimitiveTypes.Uint64, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{}}, {Buf: []byte{}}, {}, + }, + }, + }, + }, + }, + {"sparse union", + func() scalar.Scalar { + dt := arrow.UnionOf(arrow.SparseMode, []arrow.Field{ + {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + }, []arrow.UnionTypeCode{3, 42, 43}) + return scalar.NewSparseUnionScalarFromValue(scalar.MakeScalar(uint64(25)), 1, dt.(*arrow.SparseUnionType)) + }(), + exec.ArraySpan{ + Type: arrow.UnionOf(arrow.SparseMode, []arrow.Field{ + {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + }, []arrow.UnionTypeCode{3, 42, 43}), + Len: 1, + Scratch: [2]uint64{42, 0}, + Buffers: [3]exec.BufferSpan{{}, + {Buf: []byte{42}}, {}, + }, + Children: []exec.ArraySpan{ + { + Type: arrow.BinaryTypes.String, + Len: 1, + Nulls: 1, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x0}}, + {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, + {}, + }, + }, + { + Type: arrow.PrimitiveTypes.Uint64, + Len: 1, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: arrow.Uint64Traits.CastToBytes([]uint64{25})}, + {}, + }, + }, + { + Type: arrow.PrimitiveTypes.Uint64, + Len: 1, + Nulls: 1, + Buffers: [3]exec.BufferSpan{ + {Buf: []byte{0x0}}, {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, {}, + }, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &exec.ArraySpan{ + Nulls: array.UnknownNullCount, + Buffers: [3]exec.BufferSpan{{SelfAlloc: true, Owner: &memory.Buffer{}}, {SelfAlloc: true, Owner: &memory.Buffer{}}, {}}, + } + a.FillFromScalar(tt.args) + assert.Equal(t, tt.exp, *a) + }) + } +} diff --git a/go/arrow/compute/internal/exec/utils.go b/go/arrow/compute/internal/exec/utils.go new file mode 100644 index 0000000000000..d95480f540fa2 --- /dev/null +++ b/go/arrow/compute/internal/exec/utils.go @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package exec + +import ( + "unsafe" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/float16" + "golang.org/x/exp/constraints" +) + +// IntTypes is a type constraint for raw values represented as signed +// integer types by Arrow. We aren't just using constraints.Signed +// because we don't want to include the raw `int` type here whose size +// changes based on the architecture (int32 on 32-bit architectures and +// int64 on 64-bit architectures). +// +// This will also cover types like MonthInterval or the time types +// as their underlying types are int32 and int64 which will get covered +// by using the ~ +type IntTypes interface { + ~int8 | ~int16 | ~int32 | ~int64 +} + +// UintTypes is a type constraint for raw values represented as unsigned +// integer types by Arrow. We aren't just using constraints.Unsigned +// because we don't want to include the raw `uint` type here whose size +// changes based on the architecture (uint32 on 32-bit architectures and +// uint64 on 64-bit architectures). We also don't want to include uintptr +type UintTypes interface { + ~uint8 | ~uint16 | ~uint32 | ~uint64 +} + +// FloatTypes is a type constraint for raw values for representing +// floating point values in Arrow. This consists of constraints.Float and +// float16.Num +type FloatTypes interface { + float16.Num | constraints.Float +} + +// DecimalTypes is a type constraint for raw values representing larger +// decimal type values in Arrow, specifically decimal128 and decimal256. +type DecimalTypes interface { + decimal128.Num | decimal256.Num +} + +// FixedWidthTypes is a type constraint for raw values in Arrow that +// can be represented as FixedWidth byte slices. Specifically this is for +// using Go generics to easily re-type a byte slice to a properly-typed +// slice. Booleans are excluded here since they are represented by Arrow +// as a bitmap and thus the buffer can't be just reinterpreted as a []bool +type FixedWidthTypes interface { + IntTypes | UintTypes | + FloatTypes | DecimalTypes | + arrow.DayTimeInterval | arrow.MonthDayNanoInterval +} + +// GetSpanValues returns a properly typed slice bye reinterpreting +// the buffer at index i using unsafe.Slice. This will take into account +// the offset of the given ArraySpan. +func GetSpanValues[T FixedWidthTypes](span *ArraySpan, i int) []T { + ret := unsafe.Slice((*T)(unsafe.Pointer(&span.Buffers[i].Buf[0])), span.Offset+span.Len) + return ret[span.Offset:] +} + +func Min[T constraints.Ordered](a, b T) T { + if a < b { + return a + } + return b +} diff --git a/go/arrow/compute/no_exec.go b/go/arrow/compute/no_exec.go index d52c2e50adab2..f2f8b69fd6d32 100644 --- a/go/arrow/compute/no_exec.go +++ b/go/arrow/compute/no_exec.go @@ -25,8 +25,8 @@ package compute import ( "context" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" ) // dummy function which always returns false when not loading the C++ lib @@ -40,6 +40,6 @@ func (boundRef) release() {} // when compiled without the c++ library (the build tags control whether it looks for it) // then we do not have pure go implementation of the expression binding currently. -func bindExprSchema(context.Context, memory.Allocator, Expression, *arrow.Schema) (boundRef, ValueDescr, int, Expression, error) { +func bindExprSchema(context.Context, memory.Allocator, Expression, *arrow.Schema) (boundRef, arrow.DataType, int, Expression, error) { panic("arrow/compute: bind expression not implemented") } diff --git a/go/arrow/compute/registry.go b/go/arrow/compute/registry.go new file mode 100644 index 0000000000000..b749cd9d0e6f3 --- /dev/null +++ b/go/arrow/compute/registry.go @@ -0,0 +1,201 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compute + +import ( + "sync" + + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "golang.org/x/exp/maps" + "golang.org/x/exp/slices" +) + +type FunctionRegistry interface { + CanAddFunction(fn Function, allowOverwrite bool) bool + AddFunction(fn Function, allowOverwrite bool) bool + CanAddAlias(target, source string) bool + AddAlias(target, source string) bool + GetFunction(name string) (Function, bool) + GetFunctionNames() []string + NumFunctions() int + + canAddFuncName(string, bool) bool +} + +var ( + registry FunctionRegistry + once sync.Once +) + +func GetFunctionRegistry() FunctionRegistry { + once.Do(func() { + registry = NewRegistry() + // initialize the others + }) + return registry +} + +func NewRegistry() FunctionRegistry { + return &funcRegistry{ + nameToFunction: make(map[string]Function)} +} + +func NewChildRegistry(parent FunctionRegistry) FunctionRegistry { + return &funcRegistry{ + parent: parent.(*funcRegistry), + nameToFunction: make(map[string]Function)} +} + +type funcRegistry struct { + parent *funcRegistry + + mx sync.RWMutex + nameToFunction map[string]Function +} + +func (reg *funcRegistry) getLocker(add bool) sync.Locker { + if add { + return ®.mx + } + return reg.mx.RLocker() +} + +func (reg *funcRegistry) CanAddFunction(fn Function, allowOverwrite bool) bool { + if reg.parent != nil && !reg.parent.CanAddFunction(fn, allowOverwrite) { + return false + } + + return reg.doAddFunction(fn, allowOverwrite, false) +} + +func (reg *funcRegistry) AddFunction(fn Function, allowOverwrite bool) bool { + if reg.parent != nil && !reg.parent.CanAddFunction(fn, allowOverwrite) { + return false + } + + return reg.doAddFunction(fn, allowOverwrite, true) +} + +func (reg *funcRegistry) CanAddAlias(target, source string) bool { + if reg.parent != nil && !reg.parent.canAddFuncName(target, false) { + return false + } + return reg.doAddAlias(target, source, false) +} + +func (reg *funcRegistry) AddAlias(target, source string) bool { + if reg.parent != nil && !reg.parent.canAddFuncName(target, false) { + return false + } + + return reg.doAddAlias(target, source, true) +} + +func (reg *funcRegistry) GetFunction(name string) (Function, bool) { + reg.mx.RLock() + defer reg.mx.RUnlock() + + if fn, ok := reg.nameToFunction[name]; ok { + return fn, ok + } + + if reg.parent != nil { + return reg.parent.GetFunction(name) + } + + return nil, false +} + +func (reg *funcRegistry) GetFunctionNames() (out []string) { + if reg.parent != nil { + out = reg.parent.GetFunctionNames() + } else { + out = make([]string, 0, len(reg.nameToFunction)) + } + reg.mx.RLock() + defer reg.mx.RUnlock() + + out = append(out, maps.Keys(reg.nameToFunction)...) + slices.Sort(out) + return +} + +func (reg *funcRegistry) NumFunctions() (n int) { + if reg.parent != nil { + n = reg.parent.NumFunctions() + } + reg.mx.RLock() + defer reg.mx.RUnlock() + return n + len(reg.nameToFunction) +} + +func (reg *funcRegistry) canAddFuncName(name string, allowOverwrite bool) bool { + if reg.parent != nil { + reg.parent.mx.RLock() + defer reg.parent.mx.RUnlock() + + if !reg.parent.canAddFuncName(name, allowOverwrite) { + return false + } + } + if !allowOverwrite { + _, ok := reg.nameToFunction[name] + return !ok + } + return true +} + +func (reg *funcRegistry) doAddFunction(fn Function, allowOverwrite bool, add bool) bool { + debug.Assert(fn.Validate() == nil, "invalid function") + + lk := reg.getLocker(add) + lk.Lock() + defer lk.Unlock() + + name := fn.Name() + if !reg.canAddFuncName(name, allowOverwrite) { + return false + } + + if add { + reg.nameToFunction[name] = fn + } + return true +} + +func (reg *funcRegistry) doAddAlias(target, source string, add bool) bool { + // source name must exist in the registry or the parent + // check outside the mutex, in case GetFunction has a mutex + // acquisition + fn, ok := reg.GetFunction(source) + if !ok { + return false + } + + lk := reg.getLocker(add) + lk.Lock() + defer lk.Unlock() + + if !reg.canAddFuncName(target, false) { + return false + } + + if add { + reg.nameToFunction[target] = fn + } + return true +} diff --git a/go/arrow/compute/registry_test.go b/go/arrow/compute/registry_test.go new file mode 100644 index 0000000000000..747a1b450b201 --- /dev/null +++ b/go/arrow/compute/registry_test.go @@ -0,0 +1,180 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compute_test + +import ( + "context" + "errors" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/compute" + "github.com/apache/arrow/go/v10/arrow/compute/internal/exec" + "github.com/stretchr/testify/assert" + "golang.org/x/exp/slices" +) + +var registry compute.FunctionRegistry + +func init() { + // make tests fail if there's a problem initializing the global + // function registry + registry = compute.GetFunctionRegistry() +} + +type mockFn struct { + name string +} + +func (m *mockFn) Name() string { return m.name } +func (*mockFn) Kind() compute.FuncKind { return compute.FuncScalar } +func (*mockFn) Arity() compute.Arity { return compute.Unary() } +func (*mockFn) Doc() compute.FunctionDoc { return compute.EmptyFuncDoc } +func (*mockFn) NumKernels() int { return 0 } +func (*mockFn) Execute(context.Context, compute.FunctionOptions, ...compute.Datum) (compute.Datum, error) { + return nil, errors.New("not implemented") +} +func (*mockFn) DefaultOptions() compute.FunctionOptions { return nil } +func (*mockFn) Validate() error { return nil } +func (*mockFn) DispatchExact(...arrow.DataType) (exec.Kernel, error) { return nil, nil } +func (*mockFn) DispatchBest(...arrow.DataType) (exec.Kernel, error) { return nil, nil } + +func TestRegistryBasics(t *testing.T) { + tests := []struct { + name string + factory func() compute.FunctionRegistry + nfuncs int + expectedNames []string + }{ + {"default", compute.NewRegistry, 0, []string{}}, + {"nested", func() compute.FunctionRegistry { + return compute.NewChildRegistry(registry) + }, registry.NumFunctions(), registry.GetFunctionNames()}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + registry := tt.factory() + assert.Equal(t, tt.nfuncs, registry.NumFunctions()) + + fn := &mockFn{name: "f1"} + assert.True(t, registry.AddFunction(fn, false)) + assert.Equal(t, tt.nfuncs+1, registry.NumFunctions()) + + f1, ok := registry.GetFunction("f1") + assert.True(t, ok) + assert.Same(t, fn, f1) + + // non-existent + _, ok = registry.GetFunction("f2") + assert.False(t, ok) + + // name collision + f2 := &mockFn{name: "f1"} + assert.False(t, registry.AddFunction(f2, false)) + + // allow overwriting + assert.True(t, registry.AddFunction(f2, true)) + f1, ok = registry.GetFunction("f1") + assert.True(t, ok) + assert.Same(t, f2, f1) + + expected := append(tt.expectedNames, "f1") + slices.Sort(expected) + assert.Equal(t, expected, registry.GetFunctionNames()) + + // aliases + assert.False(t, registry.AddAlias("f33", "f3")) // doesn't exist + assert.True(t, registry.AddAlias("f11", "f1")) + f1, ok = registry.GetFunction("f11") + assert.True(t, ok) + assert.Same(t, f2, f1) + }) + } +} + +func TestRegistry(t *testing.T) { + defaultRegistry := registry + t.Run("RegisterTempFunctions", func(t *testing.T) { + const rounds = 3 + for i := 0; i < rounds; i++ { + registry := compute.NewChildRegistry(registry) + for _, v := range []string{"f1", "f2"} { + fn := &mockFn{name: v} + assert.True(t, registry.CanAddFunction(fn, false)) + assert.True(t, registry.AddFunction(fn, false)) + assert.False(t, registry.CanAddFunction(fn, false)) + assert.False(t, registry.AddFunction(fn, false)) + assert.True(t, defaultRegistry.CanAddFunction(fn, false)) + } + } + }) + + t.Run("RegisterTempAliases", func(t *testing.T) { + funcNames := defaultRegistry.GetFunctionNames() + const rounds = 3 + for i := 0; i < rounds; i++ { + registry := compute.NewChildRegistry(registry) + for _, funcName := range funcNames { + alias := "alias_of_" + funcName + _, ok := registry.GetFunction(alias) + assert.False(t, ok) + assert.True(t, registry.CanAddAlias(alias, funcName)) + assert.True(t, registry.AddAlias(alias, funcName)) + _, ok = registry.GetFunction(alias) + assert.True(t, ok) + _, ok = defaultRegistry.GetFunction(funcName) + assert.True(t, ok) + _, ok = defaultRegistry.GetFunction(alias) + assert.False(t, ok) + } + } + }) +} + +func TestRegistryRegisterNestedFunction(t *testing.T) { + defaultRegistry := registry + func1 := &mockFn{name: "f1"} + func2 := &mockFn{name: "f2"} + + const rounds = 3 + for i := 0; i < rounds; i++ { + registry1 := compute.NewChildRegistry(defaultRegistry) + + assert.True(t, registry1.CanAddFunction(func1, false)) + assert.True(t, registry1.AddFunction(func1, false)) + for j := 0; j < rounds; j++ { + registry2 := compute.NewChildRegistry(registry1) + assert.False(t, registry2.CanAddFunction(func1, false)) + assert.False(t, registry2.AddFunction(func1, false)) + + assert.True(t, registry2.CanAddFunction(func2, false)) + assert.True(t, registry2.AddFunction(func2, false)) + assert.False(t, registry2.CanAddFunction(func2, false)) + assert.False(t, registry2.AddFunction(func2, false)) + assert.True(t, defaultRegistry.CanAddFunction(func2, false)) + + assert.False(t, registry2.CanAddAlias("f1", "f2")) + assert.False(t, registry2.AddAlias("f1", "f2")) + assert.False(t, registry2.AddAlias("f1", "f1")) + } + assert.False(t, registry1.CanAddFunction(func1, false)) + assert.False(t, registry1.AddFunction(func1, false)) + assert.True(t, registry1.CanAddAlias("f2", "f1")) + assert.True(t, defaultRegistry.CanAddFunction(func1, false)) + } +} diff --git a/go/arrow/compute/utils.go b/go/arrow/compute/utils.go index 8af342a073814..32ad97b586df2 100644 --- a/go/arrow/compute/utils.go +++ b/go/arrow/compute/utils.go @@ -20,8 +20,8 @@ import ( "io" "math" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" "golang.org/x/xerrors" ) diff --git a/go/arrow/compute/valueshape_string.go b/go/arrow/compute/valueshape_string.go deleted file mode 100644 index 1381d2ed3935e..0000000000000 --- a/go/arrow/compute/valueshape_string.go +++ /dev/null @@ -1,25 +0,0 @@ -// Code generated by "stringer -type=ValueShape -linecomment"; DO NOT EDIT. - -package compute - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[ShapeAny-0] - _ = x[ShapeArray-1] - _ = x[ShapeScalar-2] -} - -const _ValueShape_name = "anyarrayscalar" - -var _ValueShape_index = [...]uint8{0, 3, 8, 14} - -func (i ValueShape) String() string { - if i < 0 || i >= ValueShape(len(_ValueShape_index)-1) { - return "ValueShape(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _ValueShape_name[_ValueShape_index[i]:_ValueShape_index[i+1]] -} diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go index 0f1b9c4bb22de..326c7c6f019e3 100644 --- a/go/arrow/csv/common.go +++ b/go/arrow/csv/common.go @@ -22,8 +22,8 @@ import ( "errors" "fmt" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" ) var ( @@ -159,6 +159,21 @@ func WithNullWriter(null string) Option { } } +// WithBoolWriter override the default bool formatter with a fucntion that returns +// a string representaton of bool states. i.e. True, False, 1, 0 +func WithBoolWriter(fmtr func(bool) string) Option { + return func(cfg config) { + switch cfg := cfg.(type) { + case *Writer: + if fmtr != nil { + cfg.boolFormatter = fmtr + } + default: + panic(fmt.Errorf("arrow/csv: WithBoolWriter unknown config type %T", cfg)) + } + } +} + func validate(schema *arrow.Schema) { for i, f := range schema.Fields() { switch ft := f.Type.(type) { @@ -168,6 +183,7 @@ func validate(schema *arrow.Schema) { case *arrow.Float32Type, *arrow.Float64Type: case *arrow.StringType: case *arrow.TimestampType: + case *arrow.Date32Type, *arrow.Date64Type: default: panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft)) } diff --git a/go/arrow/csv/reader.go b/go/arrow/csv/reader.go index 1278e50347efa..091aa85e960c7 100644 --- a/go/arrow/csv/reader.go +++ b/go/arrow/csv/reader.go @@ -25,10 +25,10 @@ import ( "sync" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" ) // Reader wraps encoding/csv.Reader and creates array.Records from a schema. diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go index a4696f13e7516..9b735ba6ddc13 100644 --- a/go/arrow/csv/reader_test.go +++ b/go/arrow/csv/reader_test.go @@ -23,9 +23,9 @@ import ( "log" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/csv" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/csv" + "github.com/apache/arrow/go/v10/arrow/memory" ) func Example() { diff --git a/go/arrow/csv/writer.go b/go/arrow/csv/writer.go index 83b8e1e0734da..82973b9b26a89 100644 --- a/go/arrow/csv/writer.go +++ b/go/arrow/csv/writer.go @@ -22,17 +22,18 @@ import ( "strconv" "sync" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" ) // Writer wraps encoding/csv.Writer and writes arrow.Record based on a schema. type Writer struct { - w *csv.Writer - schema *arrow.Schema - header bool - once sync.Once - nullValue string + boolFormatter func(bool) string + header bool + nullValue string + once sync.Once + schema *arrow.Schema + w *csv.Writer } // NewWriter returns a writer that writes arrow.Records to the CSV file @@ -44,9 +45,10 @@ func NewWriter(w io.Writer, schema *arrow.Schema, opts ...Option) *Writer { validate(schema) ww := &Writer{ - w: csv.NewWriter(w), - schema: schema, - nullValue: "NULL", // override by passing WithNullWriter() as an option + boolFormatter: strconv.FormatBool, // override by passing WithBoolWriter() as an option + nullValue: "NULL", // override by passing WithNullWriter() as an option + schema: schema, + w: csv.NewWriter(w), } for _, opt := range opts { opt(ww) @@ -84,7 +86,7 @@ func (w *Writer) Write(record arrow.Record) error { arr := col.(*array.Boolean) for i := 0; i < arr.Len(); i++ { if arr.IsValid(i) { - recs[i][j] = strconv.FormatBool(arr.Value(i)) + recs[i][j] = w.boolFormatter(arr.Value(i)) } else { recs[i][j] = w.nullValue } @@ -188,6 +190,35 @@ func (w *Writer) Write(record arrow.Record) error { recs[i][j] = w.nullValue } } + case *arrow.Date32Type: + arr := col.(*array.Date32) + for i := 0; i < arr.Len(); i++ { + if arr.IsValid(i) { + recs[i][j] = arr.Value(i).FormattedString() + } else { + recs[i][j] = w.nullValue + } + } + case *arrow.Date64Type: + arr := col.(*array.Date64) + for i := 0; i < arr.Len(); i++ { + if arr.IsValid(i) { + recs[i][j] = arr.Value(i).FormattedString() + } else { + recs[i][j] = w.nullValue + } + } + + case *arrow.TimestampType: + arr := col.(*array.Timestamp) + t := w.schema.Field(j).Type.(*arrow.TimestampType) + for i := 0; i < arr.Len(); i++ { + if arr.IsValid(i) { + recs[i][j] = arr.Value(i).ToTime(t.Unit).Format("2006-01-02 15:04:05.999999999") + } else { + recs[i][j] = w.nullValue + } + } } } diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go index e9cd417d28e03..f358e9fa236bf 100644 --- a/go/arrow/csv/writer_test.go +++ b/go/arrow/csv/writer_test.go @@ -17,17 +17,23 @@ package csv_test import ( + "bufio" "bytes" + ecsv "encoding/csv" "fmt" "io/ioutil" "log" - "strings" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/csv" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/csv" + "github.com/apache/arrow/go/v10/arrow/memory" +) + +const ( + separator = ';' + nullVal = "null" ) func Example_writer() { @@ -121,25 +127,71 @@ func Example_writer() { // rec[9]["str"]: ["str-9"] } +var ( + fullData = [][]string{ + {"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "f32", "f64", "str", "ts_s", "d32", "d64"}, + {"true", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", "0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26"}, + {"false", "0", "0", "0", "0", "1", "1", "1", "1", "0.1", "0.1", "str-1", "2016-09-08 15:04:05", "2022-11-08", "2031-06-28"}, + {"true", "1", "1", "1", "1", "2", "2", "2", "2", "0.2", "0.2", "str-2", "2021-09-18 15:04:05", "2025-08-04", "2034-08-28"}, + {nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal}, + } + bananaData = [][]string{ + {"bool", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64", "f32", "f64", "str", "ts_s", "d32", "d64"}, + {"BANANA", "-1", "-1", "-1", "-1", "0", "0", "0", "0", "0", "0", "str-0", "2014-07-28 15:04:05", "2017-05-18", "2028-04-26"}, + {"MANGO", "0", "0", "0", "0", "1", "1", "1", "1", "0.1", "0.1", "str-1", "2016-09-08 15:04:05", "2022-11-08", "2031-06-28"}, + {"BANANA", "1", "1", "1", "1", "2", "2", "2", "2", "0.2", "0.2", "str-2", "2021-09-18 15:04:05", "2025-08-04", "2034-08-28"}, + {nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal, nullVal}, + } +) + func TestCSVWriter(t *testing.T) { tests := []struct { - name string - header bool - }{{ - name: "Noheader", - header: false, - }, { - name: "Header", - header: true, - }} + name string + header bool + boolFormat func(bool) string + data [][]string + }{ + { + name: "Noheader", + header: false, + data: fullData[1:], + }, + { + name: "header", + header: true, + data: fullData, + }, + { + name: "Header with bool fmt", + header: true, + boolFormat: func(b bool) string { + if b { + return "BANANA" + } + return "MANGO" + }, + data: bananaData, + }} for _, test := range tests { t.Run(test.name, func(t *testing.T) { - testCSVWriter(t, test.header) + testCSVWriter(t, test.data, test.header, test.boolFormat) }) } } -func testCSVWriter(t *testing.T, writeHeader bool) { +func genTimestamps(unit arrow.TimeUnit) []arrow.Timestamp { + out := []arrow.Timestamp{} + for _, input := range []string{"2014-07-28 15:04:05", "2016-09-08 15:04:05", "2021-09-18 15:04:05"} { + ts, err := arrow.TimestampFromString(input, unit) + if err != nil { + panic(fmt.Errorf("could not convert %s to arrow.Timestamp err=%s", input, err)) + } + out = append(out, ts) + } + return out +} + +func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bool) string) { f := new(bytes.Buffer) pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) @@ -158,6 +210,9 @@ func testCSVWriter(t *testing.T, writeHeader bool) { {Name: "f32", Type: arrow.PrimitiveTypes.Float32}, {Name: "f64", Type: arrow.PrimitiveTypes.Float64}, {Name: "str", Type: arrow.BinaryTypes.String}, + {Name: "ts_s", Type: arrow.FixedWidthTypes.Timestamp_s}, + {Name: "d32", Type: arrow.FixedWidthTypes.Date32}, + {Name: "d64", Type: arrow.FixedWidthTypes.Date64}, }, nil, ) @@ -177,6 +232,9 @@ func testCSVWriter(t *testing.T, writeHeader bool) { b.Field(9).(*array.Float32Builder).AppendValues([]float32{0.0, 0.1, 0.2}, nil) b.Field(10).(*array.Float64Builder).AppendValues([]float64{0.0, 0.1, 0.2}, nil) b.Field(11).(*array.StringBuilder).AppendValues([]string{"str-0", "str-1", "str-2"}, nil) + b.Field(12).(*array.TimestampBuilder).AppendValues(genTimestamps(arrow.Second), nil) + b.Field(13).(*array.Date32Builder).AppendValues([]arrow.Date32{17304, 19304, 20304}, nil) + b.Field(14).(*array.Date64Builder).AppendValues([]arrow.Date64{1840400000000, 1940400000000, 2040400000000}, nil) for _, field := range b.Fields() { field.AppendNull() @@ -186,10 +244,11 @@ func testCSVWriter(t *testing.T, writeHeader bool) { defer rec.Release() w := csv.NewWriter(f, schema, - csv.WithComma(';'), + csv.WithComma(separator), csv.WithCRLF(false), csv.WithHeader(writeHeader), - csv.WithNullWriter("null"), + csv.WithNullWriter(nullVal), + csv.WithBoolWriter(fmtr), ) err := w.Write(rec) if err != nil { @@ -206,19 +265,48 @@ func testCSVWriter(t *testing.T, writeHeader bool) { t.Fatal(err) } - want := `true;-1;-1;-1;-1;0;0;0;0;0;0;str-0 -false;0;0;0;0;1;1;1;1;0.1;0.1;str-1 -true;1;1;1;1;2;2;2;2;0.2;0.2;str-2 -null;null;null;null;null;null;null;null;null;null;null;null -` + bdata, err := expectedOutout(data) + if err != nil { + t.Fatal(err) + } + + if err = matchCSV(bdata.Bytes(), f.Bytes()); err != nil { + t.Fatal(err) + } +} + +func expectedOutout(data [][]string) (*bytes.Buffer, error) { + b := bytes.NewBuffer(nil) + w := ecsv.NewWriter(b) + w.Comma = separator + w.UseCRLF = false + return b, w.WriteAll(data) +} + +func matchCSV(expected, test []byte) error { + expectedScanner := bufio.NewScanner(bytes.NewReader(expected)) + testScanner := bufio.NewScanner(bytes.NewReader(test)) + line := 0 + for expectedScanner.Scan() && testScanner.Scan() { + if expectedScanner.Text() != testScanner.Text() { + return fmt.Errorf("expected=%s != test=%s line=%d", expectedScanner.Text(), testScanner.Text(), line) + } + line++ + } + + if expectedScanner.Scan() { + return fmt.Errorf("expected unprocessed:%s", expectedScanner.Text()) + } - if writeHeader { - want = "bool;i8;i16;i32;i64;u8;u16;u32;u64;f32;f64;str\n" + want + if testScanner.Scan() { + return fmt.Errorf("test unprocessed:%s", testScanner.Text()) } - if got, want := f.String(), want; strings.Compare(got, want) != 0 { - t.Fatalf("invalid output:\ngot=%s\nwant=%s\n", got, want) + if err := expectedScanner.Err(); err != nil { + return err } + + return testScanner.Err() } func BenchmarkWrite(b *testing.B) { diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go index 1503f655e7ce7..4a7915f9b301a 100644 --- a/go/arrow/datatype.go +++ b/go/arrow/datatype.go @@ -17,9 +17,11 @@ package arrow import ( + "fmt" "hash/maphash" + "strings" - "github.com/apache/arrow/go/v9/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/internal/debug" ) // Type is a logical type. They can be expressed as @@ -161,6 +163,7 @@ const ( // DataType is the representation of an Arrow type. type DataType interface { + fmt.Stringer ID() Type // Name is name of the data type. Name() string @@ -168,6 +171,21 @@ type DataType interface { Layout() DataTypeLayout } +// TypesToString is a convenience function to create a list of types +// which are comma delimited as a string +func TypesToString(types []DataType) string { + var b strings.Builder + b.WriteByte('(') + for i, t := range types { + if i != 0 { + b.WriteString(", ") + } + b.WriteString(t.String()) + } + b.WriteByte(')') + return b.String() +} + // FixedWidthDataType is the representation of an Arrow type that // requires a fixed number of bits in memory for each element. type FixedWidthDataType interface { @@ -181,6 +199,11 @@ type BinaryDataType interface { binary() } +type OffsetsDataType interface { + DataType + OffsetTypeTraits() OffsetTraits +} + func HashType(seed maphash.Seed, dt DataType) uint64 { var h maphash.Hash h.SetSeed(seed) @@ -286,6 +309,24 @@ func IsBaseBinary(t Type) bool { return false } +// IsBinaryLike returns true for only BINARY and STRING +func IsBinaryLike(t Type) bool { + switch t { + case BINARY, STRING: + return true + } + return false +} + +// IsLargeBinaryLike returns true for only LARGE_BINARY and LARGE_STRING +func IsLargeBinaryLike(t Type) bool { + switch t { + case LARGE_BINARY, LARGE_STRING: + return true + } + return false +} + // IsFixedSizeBinary returns true for Decimal128/256 and FixedSizeBinary func IsFixedSizeBinary(t Type) bool { switch t { @@ -294,3 +335,39 @@ func IsFixedSizeBinary(t Type) bool { } return false } + +// IsDecimal returns true for Decimal128 and Decimal256 +func IsDecimal(t Type) bool { + switch t { + case DECIMAL128, DECIMAL256: + return true + } + return false +} + +// IsUnion returns true for Sparse and Dense Unions +func IsUnion(t Type) bool { + switch t { + case DENSE_UNION, SPARSE_UNION: + return true + } + return false +} + +// IsListLike returns true for List, LargeList, FixedSizeList, and Map +func IsListLike(t Type) bool { + switch t { + case LIST, LARGE_LIST, FIXED_SIZE_LIST, MAP: + return true + } + return false +} + +// IsNested returns true for List, LargeList, FixedSizeList, Map, Struct, and Unions +func IsNested(t Type) bool { + switch t { + case LIST, LARGE_LIST, FIXED_SIZE_LIST, MAP, STRUCT, SPARSE_UNION, DENSE_UNION: + return true + } + return false +} diff --git a/go/arrow/datatype_binary.go b/go/arrow/datatype_binary.go index d77e7a64bd153..fa6513693f8bd 100644 --- a/go/arrow/datatype_binary.go +++ b/go/arrow/datatype_binary.go @@ -16,6 +16,17 @@ package arrow +// OffsetTraits is a convenient interface over the various type traits +// constants such as arrow.Int32Traits allowing types with offsets, like +// BinaryType, StringType, LargeBinaryType and LargeStringType to have +// a method to return information about their offset type and how many bytes +// would be required to allocate an offset buffer for them. +type OffsetTraits interface { + // BytesRequired returns the number of bytes required to be allocated + // in order to hold the passed in number of elements of this type. + BytesRequired(int) int +} + type BinaryType struct{} func (t *BinaryType) ID() Type { return BINARY } @@ -27,6 +38,7 @@ func (t *BinaryType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int32SizeBytes), SpecVariableWidth()}} } +func (t *BinaryType) OffsetTypeTraits() OffsetTraits { return Int32Traits } type StringType struct{} @@ -39,13 +51,44 @@ func (t *StringType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int32SizeBytes), SpecVariableWidth()}} } +func (t *StringType) OffsetTypeTraits() OffsetTraits { return Int32Traits } + +type LargeBinaryType struct{} + +func (t *LargeBinaryType) ID() Type { return LARGE_BINARY } +func (t *LargeBinaryType) Name() string { return "large_binary" } +func (t *LargeBinaryType) String() string { return "large_binary" } +func (t *LargeBinaryType) binary() {} +func (t *LargeBinaryType) Fingerprint() string { return typeFingerprint(t) } +func (t *LargeBinaryType) Layout() DataTypeLayout { + return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), + SpecFixedWidth(Int64SizeBytes), SpecVariableWidth()}} +} +func (t *LargeBinaryType) OffsetTypeTraits() OffsetTraits { return Int64Traits } + +type LargeStringType struct{} + +func (t *LargeStringType) ID() Type { return LARGE_STRING } +func (t *LargeStringType) Name() string { return "large_utf8" } +func (t *LargeStringType) String() string { return "large_utf8" } +func (t *LargeStringType) binary() {} +func (t *LargeStringType) Fingerprint() string { return typeFingerprint(t) } +func (t *LargeStringType) Layout() DataTypeLayout { + return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), + SpecFixedWidth(Int64SizeBytes), SpecVariableWidth()}} +} +func (t *LargeStringType) OffsetTypeTraits() OffsetTraits { return Int64Traits } var ( BinaryTypes = struct { - Binary BinaryDataType - String BinaryDataType + Binary BinaryDataType + String BinaryDataType + LargeBinary BinaryDataType + LargeString BinaryDataType }{ - Binary: &BinaryType{}, - String: &StringType{}, + Binary: &BinaryType{}, + String: &StringType{}, + LargeBinary: &LargeBinaryType{}, + LargeString: &LargeStringType{}, } ) diff --git a/go/arrow/datatype_binary_test.go b/go/arrow/datatype_binary_test.go index 092c3ff6b0aa5..fd486f11dc594 100644 --- a/go/arrow/datatype_binary_test.go +++ b/go/arrow/datatype_binary_test.go @@ -19,7 +19,7 @@ package arrow_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" ) func TestBinaryType(t *testing.T) { @@ -51,3 +51,33 @@ func TestStringType(t *testing.T) { t.Fatalf("invalid string type stringer. got=%v, want=%v", got, want) } } + +func TestLargeBinaryType(t *testing.T) { + var nt *arrow.LargeBinaryType + if got, want := nt.ID(), arrow.LARGE_BINARY; got != want { + t.Fatalf("invalid binary type id. got=%v, want=%v", got, want) + } + + if got, want := nt.Name(), "large_binary"; got != want { + t.Fatalf("invalid binary type name. got=%v, want=%v", got, want) + } + + if got, want := nt.String(), "large_binary"; got != want { + t.Fatalf("invalid binary type stringer. got=%v, want=%v", got, want) + } +} + +func TestLargeStringType(t *testing.T) { + var nt *arrow.LargeStringType + if got, want := nt.ID(), arrow.LARGE_STRING; got != want { + t.Fatalf("invalid string type id. got=%v, want=%v", got, want) + } + + if got, want := nt.Name(), "large_utf8"; got != want { + t.Fatalf("invalid string type name. got=%v, want=%v", got, want) + } + + if got, want := nt.String(), "large_utf8"; got != want { + t.Fatalf("invalid string type stringer. got=%v, want=%v", got, want) + } +} diff --git a/go/arrow/datatype_extension_test.go b/go/arrow/datatype_extension_test.go index d6de8bbe4f471..0352ad758a61f 100644 --- a/go/arrow/datatype_extension_test.go +++ b/go/arrow/datatype_extension_test.go @@ -20,8 +20,8 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go index bf64299a6f20e..5589ad637a2f5 100644 --- a/go/arrow/datatype_fixedwidth.go +++ b/go/arrow/datatype_fixedwidth.go @@ -529,6 +529,26 @@ func (Decimal128Type) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal128SizeBytes)}} } +// Decimal256Type represents a fixed-size 256-bit decimal type. +type Decimal256Type struct { + Precision int32 + Scale int32 +} + +func (*Decimal256Type) ID() Type { return DECIMAL256 } +func (*Decimal256Type) Name() string { return "decimal256" } +func (*Decimal256Type) BitWidth() int { return 256 } +func (t *Decimal256Type) String() string { + return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale) +} +func (t *Decimal256Type) Fingerprint() string { + return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale) +} + +func (Decimal256Type) Layout() DataTypeLayout { + return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal256SizeBytes)}} +} + // MonthInterval represents a number of months. type MonthInterval int32 diff --git a/go/arrow/datatype_fixedwidth_test.go b/go/arrow/datatype_fixedwidth_test.go index 9273836c2c513..dcd44540ee404 100644 --- a/go/arrow/datatype_fixedwidth_test.go +++ b/go/arrow/datatype_fixedwidth_test.go @@ -20,7 +20,7 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" "github.com/stretchr/testify/assert" ) @@ -69,6 +69,33 @@ func TestDecimal128Type(t *testing.T) { } } +func TestDecimal256Type(t *testing.T) { + for _, tc := range []struct { + precision int32 + scale int32 + want string + }{ + {1, 10, "decimal256(1, 10)"}, + {10, 10, "decimal256(10, 10)"}, + {10, 1, "decimal256(10, 1)"}, + } { + t.Run(tc.want, func(t *testing.T) { + dt := arrow.Decimal256Type{Precision: tc.precision, Scale: tc.scale} + if got, want := dt.BitWidth(), 256; got != want { + t.Fatalf("invalid bitwidth: got=%d, want=%d", got, want) + } + + if got, want := dt.ID(), arrow.DECIMAL256; got != want { + t.Fatalf("invalid type ID: got=%v, want=%v", got, want) + } + + if got, want := dt.String(), tc.want; got != want { + t.Fatalf("invalid stringer: got=%q, want=%q", got, want) + } + }) + } +} + func TestFixedSizeBinaryType(t *testing.T) { for _, tc := range []struct { byteWidth int diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go index 108ef82779e54..94f422d0507dd 100644 --- a/go/arrow/datatype_nested.go +++ b/go/arrow/datatype_nested.go @@ -17,8 +17,12 @@ package arrow import ( + "errors" "fmt" + "strconv" "strings" + + "github.com/apache/arrow/go/v10/arrow/internal/debug" ) type NestedType interface { @@ -94,6 +98,59 @@ func (ListType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int32SizeBytes)}} } +func (ListType) OffsetTypeTraits() OffsetTraits { return Int32Traits } + +type LargeListType struct { + ListType +} + +func (LargeListType) ID() Type { return LARGE_LIST } +func (LargeListType) Name() string { return "large_list" } +func (t *LargeListType) String() string { + return "large_" + t.ListType.String() +} + +func (t *LargeListType) Fingerprint() string { + child := t.elem.Type.Fingerprint() + if len(child) > 0 { + return typeFingerprint(t) + "{" + child + "}" + } + return "" +} + +func (LargeListType) Layout() DataTypeLayout { + return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int64SizeBytes)}} +} + +func (LargeListType) OffsetTypeTraits() OffsetTraits { return Int64Traits } + +func LargeListOfField(f Field) *LargeListType { + if f.Type == nil { + panic("arrow: nil type for list field") + } + return &LargeListType{ListType{elem: f}} +} + +// ListOf returns the list type with element type t. +// For example, if t represents int32, ListOf(t) represents []int32. +// +// ListOf panics if t is nil or invalid. NullableElem defaults to true +func LargeListOf(t DataType) *LargeListType { + if t == nil { + panic("arrow: nil DataType") + } + return &LargeListType{ListType{elem: Field{Name: "item", Type: t, Nullable: true}}} +} + +// ListOfNonNullable is like ListOf but NullableElem defaults to false, indicating +// that the child type should be marked as non-nullable. +func LargeListOfNonNullable(t DataType) *LargeListType { + if t == nil { + panic("arrow: nil DataType") + } + return &LargeListType{ListType{elem: Field{Name: "item", Type: t, Nullable: false}}} +} + // FixedSizeListType describes a nested type in which each array slot contains // a fixed-size sequence of values, all having the same relative type. type FixedSizeListType struct { @@ -217,7 +274,7 @@ func (*StructType) ID() Type { return STRUCT } func (*StructType) Name() string { return "struct" } func (t *StructType) String() string { - o := new(strings.Builder) + var o strings.Builder o.WriteString("struct<") for i, f := range t.fields { if i > 0 { @@ -323,12 +380,294 @@ func (t *MapType) Fingerprint() string { return fingerprint + "{" + keyFingerprint + itemFingerprint + "}" } -func (t *MapType) Fields() []Field { return t.ValueType().Fields() } +func (t *MapType) Fields() []Field { return []Field{t.ValueField()} } func (t *MapType) Layout() DataTypeLayout { return t.value.Layout() } +func (MapType) OffsetTypeTraits() OffsetTraits { return Int32Traits } + +type ( + // UnionTypeCode is an alias to int8 which is the type of the ids + // used for union arrays. + UnionTypeCode = int8 + UnionMode int8 +) + +const ( + MaxUnionTypeCode UnionTypeCode = 127 + InvalidUnionChildID int = -1 + + SparseMode UnionMode = iota // SPARSE + DenseMode // DENSE +) + +// UnionType is an interface to encompass both Dense and Sparse Union types. +// +// A UnionType is a nested type where each logical value is taken +// from a single child. A buffer of 8-bit type ids (typed as UnionTypeCode) +// indicates which child a given logical value is to be taken from. This is +// represented as the "child id" or "child index", which is the index into the +// list of child fields for a given child. +type UnionType interface { + NestedType + // Mode returns either SparseMode or DenseMode depending on the current + // concrete data type. + Mode() UnionMode + // ChildIDs returns a slice of ints to map UnionTypeCode values to + // the index in the Fields that represents the given Type. It is + // initialized with all values being InvalidUnionChildID (-1) + // before being populated based on the TypeCodes and fields of the type. + // The field for a given type can be retrieved by Fields()[ChildIDs()[typeCode]] + ChildIDs() []int + // TypeCodes returns the list of available type codes for this union type + // which will correspond to indexes into the ChildIDs slice to locate the + // appropriate child. A union Array contains a buffer of these type codes + // which indicate for a given index, which child has the value for that index. + TypeCodes() []UnionTypeCode + // MaxTypeCode returns the value of the largest TypeCode in the list of typecodes + // that are defined by this Union type + MaxTypeCode() UnionTypeCode +} + +// UnionOf returns an appropriate union type for the given Mode (Sparse or Dense), +// child fields, and type codes. len(fields) == len(typeCodes) must be true, or else +// this will panic. len(fields) can be 0. +func UnionOf(mode UnionMode, fields []Field, typeCodes []UnionTypeCode) UnionType { + switch mode { + case SparseMode: + return SparseUnionOf(fields, typeCodes) + case DenseMode: + return DenseUnionOf(fields, typeCodes) + default: + panic("arrow: invalid union mode") + } +} + +type unionType struct { + children []Field + typeCodes []UnionTypeCode + childIDs [int(MaxUnionTypeCode) + 1]int +} + +func (t *unionType) init(fields []Field, typeCodes []UnionTypeCode) { + // initialize all child IDs to -1 + t.childIDs[0] = InvalidUnionChildID + for i := 1; i < len(t.childIDs); i *= 2 { + copy(t.childIDs[i:], t.childIDs[:i]) + } + + t.children = fields + t.typeCodes = typeCodes + + for i, tc := range t.typeCodes { + t.childIDs[tc] = i + } +} + +func (t unionType) Fields() []Field { return t.children } +func (t unionType) TypeCodes() []UnionTypeCode { return t.typeCodes } +func (t unionType) ChildIDs() []int { return t.childIDs[:] } + +func (t *unionType) validate(fields []Field, typeCodes []UnionTypeCode, _ UnionMode) error { + if len(fields) != len(typeCodes) { + return errors.New("arrow: union types should have the same number of fields as type codes") + } + + for _, c := range typeCodes { + if c < 0 || c > MaxUnionTypeCode { + return errors.New("arrow: union type code out of bounds") + } + } + return nil +} + +func (t *unionType) MaxTypeCode() (max UnionTypeCode) { + if len(t.typeCodes) == 0 { + return + } + + max = t.typeCodes[0] + for _, c := range t.typeCodes[1:] { + if c > max { + max = c + } + } + return +} + +func (t *unionType) String() string { + var b strings.Builder + b.WriteByte('<') + for i := range t.typeCodes { + if i != 0 { + b.WriteString(", ") + } + fmt.Fprintf(&b, "%s=%d", t.children[i], t.typeCodes[i]) + } + b.WriteByte('>') + return b.String() +} + +func (t *unionType) fingerprint() string { + var b strings.Builder + for _, c := range t.typeCodes { + fmt.Fprintf(&b, ":%d", c) + } + b.WriteString("]{") + for _, c := range t.children { + fingerprint := c.Fingerprint() + if len(fingerprint) == 0 { + return "" + } + b.WriteString(fingerprint) + b.WriteByte(';') + } + b.WriteByte('}') + return b.String() +} + +func fieldsFromArrays(arrays []Array, names ...string) (ret []Field) { + ret = make([]Field, len(arrays)) + if len(names) == 0 { + for i, c := range arrays { + ret[i] = Field{Name: strconv.Itoa(i), Type: c.DataType(), Nullable: true} + } + } else { + debug.Assert(len(names) == len(arrays), "mismatch of arrays and names") + for i, c := range arrays { + ret[i] = Field{Name: names[i], Type: c.DataType(), Nullable: true} + } + } + return +} + +// SparseUnionType is the concrete type for Sparse union data. +// +// A sparse union is a nested type where each logical value is taken +// from a single child. A buffer of 8-bit type ids indicates which child +// a given logical value is to be taken from. +// +// In a sparse union, each child array will have the same length as the +// union array itself, regardless of the actual number of union values which +// refer to it. +// +// Unlike most other types, unions do not have a top-level validity bitmap. +type SparseUnionType struct { + unionType +} + +// SparseUnionFromArrays enables creating a union type from a list of Arrays, +// field names, and type codes. len(fields) should be either 0 or equal to len(children). +// len(codes) should also be either 0, or equal to len(children). +// +// If len(fields) == 0, then the fields will be named numerically as "0", "1", "2"... +// and so on. If len(codes) == 0, then the type codes will be constructed as +// [0, 1, 2, ..., n]. +func SparseUnionFromArrays(children []Array, fields []string, codes []UnionTypeCode) *SparseUnionType { + if len(codes) == 0 { + codes = make([]UnionTypeCode, len(children)) + for i := range children { + codes[i] = UnionTypeCode(i) + } + } + return SparseUnionOf(fieldsFromArrays(children, fields...), codes) +} + +// SparseUnionOf is equivalent to UnionOf(arrow.SparseMode, fields, typeCodes), +// constructing a SparseUnionType from a list of fields and type codes. +// +// If len(fields) != len(typeCodes) this will panic. They are allowed to be +// of length 0. +func SparseUnionOf(fields []Field, typeCodes []UnionTypeCode) *SparseUnionType { + ret := &SparseUnionType{} + if err := ret.validate(fields, typeCodes, ret.Mode()); err != nil { + panic(err) + } + ret.init(fields, typeCodes) + return ret +} + +func (SparseUnionType) ID() Type { return SPARSE_UNION } +func (SparseUnionType) Name() string { return "sparse_union" } +func (SparseUnionType) Mode() UnionMode { return SparseMode } +func (t *SparseUnionType) Fingerprint() string { + return typeFingerprint(t) + "[s" + t.fingerprint() +} +func (SparseUnionType) Layout() DataTypeLayout { + return DataTypeLayout{Buffers: []BufferSpec{SpecAlwaysNull(), SpecFixedWidth(Uint8SizeBytes)}} +} +func (t *SparseUnionType) String() string { + return t.Name() + t.unionType.String() +} + +// DenseUnionType is the concrete type for dense union data. +// +// A dense union is a nested type where each logical value is taken from a +// single child, at a specific offset. A buffer of 8-bit type ids (typed +// as UnionTypeCode) indicates which child a given logical value is to be +// taken from and a buffer of 32-bit offsets indicating which physical position +// in the given child array has the logical value for that index. +// +// Unlike a sparse union, a dense union allows encoding only the child values +// which are actually referred to by the union array. This is counterbalanced +// by the additional footprint of the offsets buffer, and the additional +// indirection cost when looking up values. +// +// Unlike most other types, unions don't have a top-level validity bitmap +type DenseUnionType struct { + unionType +} + +// DenseUnionFromArrays enables creating a union type from a list of Arrays, +// field names, and type codes. len(fields) should be either 0 or equal to len(children). +// len(codes) should also be either 0, or equal to len(children). +// +// If len(fields) == 0, then the fields will be named numerically as "0", "1", "2"... +// and so on. If len(codes) == 0, then the type codes will be constructed as +// [0, 1, 2, ..., n]. +func DenseUnionFromArrays(children []Array, fields []string, codes []UnionTypeCode) *DenseUnionType { + if len(codes) == 0 { + codes = make([]UnionTypeCode, len(children)) + for i := range children { + codes[i] = UnionTypeCode(i) + } + } + return DenseUnionOf(fieldsFromArrays(children, fields...), codes) +} + +// DenseUnionOf is equivalent to UnionOf(arrow.DenseMode, fields, typeCodes), +// constructing a SparseUnionType from a list of fields and type codes. +// +// If len(fields) != len(typeCodes) this will panic. They are allowed to be +// of length 0. +func DenseUnionOf(fields []Field, typeCodes []UnionTypeCode) *DenseUnionType { + ret := &DenseUnionType{} + if err := ret.validate(fields, typeCodes, ret.Mode()); err != nil { + panic(err) + } + ret.init(fields, typeCodes) + return ret +} + +func (DenseUnionType) ID() Type { return DENSE_UNION } +func (DenseUnionType) Name() string { return "dense_union" } +func (DenseUnionType) Mode() UnionMode { return DenseMode } +func (t *DenseUnionType) Fingerprint() string { + return typeFingerprint(t) + "[s" + t.fingerprint() +} + +func (DenseUnionType) Layout() DataTypeLayout { + return DataTypeLayout{Buffers: []BufferSpec{SpecAlwaysNull(), SpecFixedWidth(Uint8SizeBytes), SpecFixedWidth(Int32SizeBytes)}} +} + +func (DenseUnionType) OffsetTypeTraits() OffsetTraits { return Int32Traits } + +func (t *DenseUnionType) String() string { + return t.Name() + t.unionType.String() +} + type Field struct { Name string // Field name Type DataType // The field's data type @@ -373,14 +712,14 @@ func (f Field) Equal(o Field) bool { } func (f Field) String() string { - o := new(strings.Builder) + var o strings.Builder nullable := "" if f.Nullable { nullable = ", nullable" } - fmt.Fprintf(o, "%s: type=%v%v", f.Name, f.Type, nullable) + fmt.Fprintf(&o, "%s: type=%v%v", f.Name, f.Type, nullable) if f.HasMetadata() { - fmt.Fprintf(o, "\n%*.smetadata: %v", len(f.Name)+2, "", f.Metadata) + fmt.Fprintf(&o, "\n%*.smetadata: %v", len(f.Name)+2, "", f.Metadata) } return o.String() } diff --git a/go/arrow/datatype_null_test.go b/go/arrow/datatype_null_test.go index 4bcb690d76321..5b07d50b6c4a7 100644 --- a/go/arrow/datatype_null_test.go +++ b/go/arrow/datatype_null_test.go @@ -19,7 +19,7 @@ package arrow_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" ) func TestNullType(t *testing.T) { diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go index 14ad42ea5dc1b..fa044f46ec58f 100644 --- a/go/arrow/decimal128/decimal128.go +++ b/go/arrow/decimal128/decimal128.go @@ -22,7 +22,7 @@ import ( "math" "math/big" - "github.com/apache/arrow/go/v9/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/internal/debug" ) var ( @@ -67,7 +67,7 @@ func FromI64(v int64) Num { // BitLen > 128, this will panic. func FromBigInt(v *big.Int) (n Num) { bitlen := v.BitLen() - if bitlen > 128 { + if bitlen > 127 { panic("arrow/decimal128: cannot represent value larger than 128bits") } else if bitlen == 0 { // if bitlen is 0, then the value is 0 so return the default zeroed @@ -101,26 +101,6 @@ func (n Num) Negate() Num { return n } -func fromPositiveFloat32(v float32, prec, scale int32) (Num, error) { - var pscale float32 - if scale >= -38 && scale <= 38 { - pscale = float32PowersOfTen[scale+38] - } else { - pscale = float32(math.Pow10(int(scale))) - } - - v *= pscale - v = float32(math.RoundToEven(float64(v))) - maxabs := float32PowersOfTen[prec+38] - if v <= -maxabs || v >= maxabs { - return Num{}, fmt.Errorf("cannot convert %f to decimal128(precision=%d, scale=%d): overflow", v, prec, scale) - } - - hi := float32(math.Floor(math.Ldexp(float64(v), -64))) - low := v - float32(math.Ldexp(float64(hi), 64)) - return Num{hi: int64(hi), lo: uint64(low)}, nil -} - func fromPositiveFloat64(v float64, prec, scale int32) (Num, error) { var pscale float64 if scale >= -38 && scale <= 38 { @@ -145,14 +125,7 @@ func fromPositiveFloat64(v float64, prec, scale int32) (Num, error) { // value using the provided precision and scale. Will return an error if the // value cannot be accurately represented with the desired precision and scale. func FromFloat32(v float32, prec, scale int32) (Num, error) { - if v < 0 { - dec, err := fromPositiveFloat32(-v, prec, scale) - if err != nil { - return dec, err - } - return dec.Negate(), nil - } - return fromPositiveFloat32(v, prec, scale) + return FromFloat64(float64(v), prec, scale) } // FromFloat64 returns a new decimal128.Num constructed from the given float64 @@ -169,25 +142,10 @@ func FromFloat64(v float64, prec, scale int32) (Num, error) { return fromPositiveFloat64(v, prec, scale) } -func (n Num) tofloat32Positive(scale int32) float32 { - const twoTo64 float32 = 1.8446744e+19 - x := float32(n.hi) * twoTo64 - x += float32(n.lo) - if scale >= -38 && scale <= 38 { - x *= float32PowersOfTen[-scale+38] - } else { - x *= float32(math.Pow10(-int(scale))) - } - return x -} - // ToFloat32 returns a float32 value representative of this decimal128.Num, // but with the given scale. func (n Num) ToFloat32(scale int32) float32 { - if n.hi < 0 { - return -n.Negate().tofloat32Positive(scale) - } - return n.tofloat32Positive(scale) + return float32(n.ToFloat64(scale)) } func (n Num) tofloat64Positive(scale int32) float64 { @@ -195,11 +153,10 @@ func (n Num) tofloat64Positive(scale int32) float64 { x := float64(n.hi) * twoTo64 x += float64(n.lo) if scale >= -38 && scale <= 38 { - x *= float64PowersOfTen[-scale+38] - } else { - x *= math.Pow10(-int(scale)) + return x * float64PowersOfTen[-scale+38] } - return x + + return x * math.Pow10(-int(scale)) } // ToFloat64 returns a float64 value representative of this decimal128.Num, @@ -431,17 +388,6 @@ var ( New(2710505431213761085, 343699775700336640), } - float32PowersOfTen = [...]float32{ - 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, - 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, - 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, - 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, - 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, - 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, - 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, - 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, - } - float64PowersOfTen = [...]float64{ 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, diff --git a/go/arrow/decimal128/decimal128_test.go b/go/arrow/decimal128/decimal128_test.go index e986e95fe838c..ed2a180715eb8 100644 --- a/go/arrow/decimal128/decimal128_test.go +++ b/go/arrow/decimal128/decimal128_test.go @@ -22,7 +22,7 @@ import ( "math/big" "testing" - "github.com/apache/arrow/go/v9/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal128" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/decimal256/decimal256.go b/go/arrow/decimal256/decimal256.go new file mode 100644 index 0000000000000..3dbaa56024f4d --- /dev/null +++ b/go/arrow/decimal256/decimal256.go @@ -0,0 +1,476 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package decimal256 + +import ( + "errors" + "fmt" + "math" + "math/big" + + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/internal/debug" +) + +const ( + MaxPrecision = 76 + MaxScale = 76 +) + +type Num struct { + // arr[0] is the lowest bits, arr[3] is the highest bits + arr [4]uint64 +} + +// New returns a new signed 256-bit integer value where x1 contains +// the highest bits with the rest of the values in order down to the +// lowest bits +// +// ie: New(1, 2, 3, 4) returns with the elements in little-endian order +// {4, 3, 2, 1} but each value is still represented as the native endianness +func New(x1, x2, x3, x4 uint64) Num { + return Num{[4]uint64{x4, x3, x2, x1}} +} + +func (n Num) Array() [4]uint64 { return n.arr } + +func (n Num) LowBits() uint64 { return n.arr[0] } + +func FromDecimal128(n decimal128.Num) Num { + var topBits uint64 + if n.Sign() < 0 { + topBits = math.MaxUint64 + } + return New(topBits, topBits, uint64(n.HighBits()), n.LowBits()) +} + +func FromU64(v uint64) Num { + return Num{[4]uint64{v, 0, 0, 0}} +} + +func FromI64(v int64) Num { + switch { + case v > 0: + return New(0, 0, 0, uint64(v)) + case v < 0: + return New(math.MaxUint64, math.MaxUint64, math.MaxUint64, uint64(v)) + default: + return Num{} + } +} + +func (n Num) Negate() Num { + var carry uint64 = 1 + for i := range n.arr { + n.arr[i] = ^n.arr[i] + carry + if n.arr[i] != 0 { + carry = 0 + } + } + return n +} + +func FromFloat32(v float32, prec, scale int32) (Num, error) { + return FromFloat64(float64(v), prec, scale) +} + +func FromFloat64(v float64, prec, scale int32) (Num, error) { + debug.Assert(prec > 0 && prec <= 76, "invalid precision for converting to decimal256") + + if math.IsInf(v, 0) { + return Num{}, fmt.Errorf("cannot convert %f to decimal256", v) + } + + if v < 0 { + dec, err := fromPositiveFloat64(-v, prec, scale) + if err != nil { + return dec, err + } + return dec.Negate(), nil + } + return fromPositiveFloat64(v, prec, scale) +} + +func fromPositiveFloat64(v float64, prec, scale int32) (Num, error) { + var pscale float64 + if scale >= -76 && scale <= 76 { + pscale = float64PowersOfTen[scale+76] + } else { + pscale = math.Pow10(int(scale)) + } + + v *= pscale + v = math.RoundToEven(v) + maxabs := float64PowersOfTen[prec+76] + if v <= -maxabs || v >= maxabs { + return Num{}, fmt.Errorf("cannot convert %f to decimal256(precision=%d, scale=%d): overflow", + v, prec, scale) + } + + var arr [4]float64 + arr[3] = math.Floor(math.Ldexp(v, -192)) + v -= math.Ldexp(arr[3], 192) + arr[2] = math.Floor(math.Ldexp(v, -128)) + v -= math.Ldexp(arr[2], 128) + arr[1] = math.Floor(math.Ldexp(v, -64)) + v -= math.Ldexp(arr[1], 64) + arr[0] = v + + debug.Assert(arr[3] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[3] < 1.8446744073709552e+19, "bad conversion float64 to decimal256") // 2**64 + debug.Assert(arr[2] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[2] < 1.8446744073709552e+19, "bad conversion float64 to decimal256") // 2**64 + debug.Assert(arr[1] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[1] < 1.8446744073709552e+19, "bad conversion float64 to decimal256") // 2**64 + debug.Assert(arr[0] >= 0, "bad conversion float64 to decimal256") + debug.Assert(arr[0] < 1.8446744073709552e+19, "bad conversion float64 to decimal256") // 2**64 + return Num{[4]uint64{uint64(arr[0]), uint64(arr[1]), uint64(arr[2]), uint64(arr[3])}}, nil +} + +func (n Num) tofloat64Positive(scale int32) float64 { + const ( + twoTo64 float64 = 1.8446744073709552e+19 + twoTo128 float64 = 3.402823669209385e+38 + twoTo192 float64 = 6.277101735386681e+57 + ) + + x := float64(n.arr[3]) * twoTo192 + x += float64(n.arr[2]) * twoTo128 + x += float64(n.arr[1]) * twoTo64 + x += float64(n.arr[0]) + + if scale >= -76 && scale <= 76 { + return x * float64PowersOfTen[-scale+76] + } + + return x * math.Pow10(-int(scale)) +} + +func (n Num) ToFloat32(scale int32) float32 { return float32(n.ToFloat64(scale)) } + +func (n Num) ToFloat64(scale int32) float64 { + if n.Sign() < 0 { + return -n.Negate().tofloat64Positive(scale) + } + return n.tofloat64Positive(scale) +} + +func (n Num) Sign() int { + if n == (Num{}) { + return 0 + } + return int(1 | (int64(n.arr[3]) >> 63)) +} + +func FromBigInt(v *big.Int) (n Num) { + bitlen := v.BitLen() + if bitlen > 255 { + panic("arrow/decimal256: cannot represent value larger than 256bits") + } else if bitlen == 0 { + return + } + + b := v.Bits() + for i, bits := range b { + n.arr[i] = uint64(bits) + } + if v.Sign() < 0 { + return n.Negate() + } + return +} + +func toBigIntPositive(n Num) *big.Int { + return new(big.Int).SetBits([]big.Word{big.Word(n.arr[0]), big.Word(n.arr[1]), big.Word(n.arr[2]), big.Word(n.arr[3])}) +} + +func (n Num) BigInt() *big.Int { + if n.Sign() < 0 { + b := toBigIntPositive(n.Negate()) + return b.Neg(b) + } + return toBigIntPositive(n) +} + +func (n Num) Less(other Num) bool { + switch { + case n.arr[3] != other.arr[3]: + return n.arr[3] < other.arr[3] + case n.arr[2] != other.arr[2]: + return n.arr[2] < other.arr[2] + case n.arr[1] != other.arr[1]: + return n.arr[1] < other.arr[1] + } + return n.arr[0] < other.arr[0] +} + +func (n Num) IncreaseScaleBy(increase int32) Num { + debug.Assert(increase >= 0, "invalid amount to increase scale by") + debug.Assert(increase <= 76, "invalid amount to increase scale by") + + v := scaleMultipliers[increase].BigInt() + return FromBigInt(v.Mul(n.BigInt(), v)) +} + +func (n Num) ReduceScaleBy(reduce int32, round bool) Num { + debug.Assert(reduce >= 0, "invalid amount to reduce scale by") + debug.Assert(reduce <= 76, "invalid amount to reduce scale by") + + if reduce == 0 { + return n + } + + divisor := scaleMultipliers[reduce].BigInt() + result, remainder := divisor.QuoRem(n.BigInt(), divisor, new(big.Int)) + if round { + divisorHalf := scaleMultipliersHalf[reduce] + if remainder.Abs(remainder).Cmp(divisorHalf.BigInt()) != -1 { + result.Add(result, big.NewInt(int64(n.Sign()))) + } + } + return FromBigInt(result) +} + +func (n Num) rescaleWouldCauseDataLoss(deltaScale int32, multiplier Num) (out Num, loss bool) { + var ( + value, result, remainder *big.Int + ) + value = n.BigInt() + if deltaScale < 0 { + result, remainder = new(big.Int).QuoRem(value, multiplier.BigInt(), new(big.Int)) + return FromBigInt(result), remainder.Cmp(big.NewInt(0)) != 0 + } + + result = (&big.Int{}).Mul(value, multiplier.BigInt()) + out = FromBigInt(result) + cmp := result.Cmp(value) + if n.Sign() < 0 { + loss = cmp == 1 + } else { + loss = cmp == -1 + } + return +} + +func (n Num) Rescale(original, newscale int32) (out Num, err error) { + if original == newscale { + return n, nil + } + + deltaScale := newscale - original + absDeltaScale := int32(math.Abs(float64(deltaScale))) + + multiplier := scaleMultipliers[absDeltaScale] + var wouldHaveLoss bool + out, wouldHaveLoss = n.rescaleWouldCauseDataLoss(deltaScale, multiplier) + if wouldHaveLoss { + err = errors.New("rescale data loss") + } + return +} + +func (n Num) Abs() Num { + switch n.Sign() { + case -1: + return n.Negate() + } + return n +} + +func (n Num) FitsInPrecision(prec int32) bool { + debug.Assert(prec > 0, "precision must be > 0") + debug.Assert(prec <= 76, "precision must be <= 76") + return n.Abs().Less(scaleMultipliers[prec]) +} + +var ( + scaleMultipliers = [...]Num{ + FromU64(1), + FromU64(10), + FromU64(100), + FromU64(1000), + FromU64(10000), + FromU64(100000), + FromU64(1000000), + FromU64(10000000), + FromU64(100000000), + FromU64(1000000000), + FromU64(10000000000), + FromU64(100000000000), + FromU64(1000000000000), + FromU64(10000000000000), + FromU64(100000000000000), + FromU64(1000000000000000), + FromU64(10000000000000000), + FromU64(100000000000000000), + FromU64(1000000000000000000), + New(0, 0, 0, 10000000000000000000), + New(0, 0, 5, 7766279631452241920), + New(0, 0, 54, 3875820019684212736), + New(0, 0, 542, 1864712049423024128), + New(0, 0, 5421, 200376420520689664), + New(0, 0, 54210, 2003764205206896640), + New(0, 0, 542101, 1590897978359414784), + New(0, 0, 5421010, 15908979783594147840), + New(0, 0, 54210108, 11515845246265065472), + New(0, 0, 542101086, 4477988020393345024), + New(0, 0, 5421010862, 7886392056514347008), + New(0, 0, 54210108624, 5076944270305263616), + New(0, 0, 542101086242, 13875954555633532928), + New(0, 0, 5421010862427, 9632337040368467968), + New(0, 0, 54210108624275, 4089650035136921600), + New(0, 0, 542101086242752, 4003012203950112768), + New(0, 0, 5421010862427522, 3136633892082024448), + New(0, 0, 54210108624275221, 12919594847110692864), + New(0, 0, 542101086242752217, 68739955140067328), + New(0, 0, 5421010862427522170, 687399551400673280), + New(0, 2, 17316620476856118468, 6873995514006732800), + New(0, 29, 7145508105175220139, 13399722918938673152), + New(0, 293, 16114848830623546549, 4870020673419870208), + New(0, 2938, 13574535716559052564, 11806718586779598848), + New(0, 29387, 6618148649623664334, 7386721425538678784), + New(0, 293873, 10841254275107988496, 80237960548581376), + New(0, 2938735, 16178822382532126880, 802379605485813760), + New(0, 29387358, 14214271235644855872, 8023796054858137600), + New(0, 293873587, 13015503840481697412, 6450984253743169536), + New(0, 2938735877, 1027829888850112811, 9169610316303040512), + New(0, 29387358770, 10278298888501128114, 17909126868192198656), + New(0, 293873587705, 10549268516463523069, 13070572018536022016), + New(0, 2938735877055, 13258964796087472617, 1578511669393358848), + New(0, 29387358770557, 3462439444907864858, 15785116693933588480), + New(0, 293873587705571, 16177650375369096972, 10277214349659471872), + New(0, 2938735877055718, 14202551164014556797, 10538423128046960640), + New(0, 29387358770557187, 12898303124178706663, 13150510911921848320), + New(0, 293873587705571876, 18302566799529756941, 2377900603251621888), + New(0, 2938735877055718769, 17004971331911604867, 5332261958806667264), + New(1, 10940614696847636083, 4029016655730084128, 16429131440647569408), + New(15, 17172426599928602752, 3396678409881738056, 16717361816799281152), + New(159, 5703569335900062977, 15520040025107828953, 1152921504606846976), + New(1593, 1695461137871974930, 7626447661401876602, 11529215046068469760), + New(15930, 16954611378719749304, 2477500319180559562, 4611686018427387904), + New(159309, 3525417123811528497, 6328259118096044006, 9223372036854775808), + New(1593091, 16807427164405733357, 7942358959831785217, 0), + New(15930919, 2053574980671369030, 5636613303479645706, 0), + New(159309191, 2089005733004138687, 1025900813667802212, 0), + New(1593091911, 2443313256331835254, 10259008136678022120, 0), + New(15930919111, 5986388489608800929, 10356360998232463120, 0), + New(159309191113, 4523652674959354447, 11329889613776873120, 0), + New(1593091911132, 8343038602174441244, 2618431695511421504, 0), + New(15930919111324, 9643409726906205977, 7737572881404663424, 0), + New(159309191113245, 4200376900514301694, 3588752519208427776, 0), + New(1593091911132452, 5110280857723913709, 17440781118374726144, 0), + New(15930919111324522, 14209320429820033867, 8387114520361296896, 0), + New(159309191113245227, 12965995782233477362, 10084168908774762496, 0), + New(1593091911132452277, 532749306367912313, 8607968719199866880, 0), + } + + scaleMultipliersHalf = [...]Num{ + FromU64(0), + FromU64(5), + FromU64(50), + FromU64(500), + FromU64(5000), + FromU64(50000), + FromU64(500000), + FromU64(5000000), + FromU64(50000000), + FromU64(500000000), + FromU64(5000000000), + FromU64(50000000000), + FromU64(500000000000), + FromU64(5000000000000), + FromU64(50000000000000), + FromU64(500000000000000), + FromU64(5000000000000000), + FromU64(50000000000000000), + FromU64(500000000000000000), + FromU64(5000000000000000000), + New(0, 0, 2, 13106511852580896768), + New(0, 0, 27, 1937910009842106368), + New(0, 0, 271, 932356024711512064), + New(0, 0, 2710, 9323560247115120640), + New(0, 0, 27105, 1001882102603448320), + New(0, 0, 271050, 10018821026034483200), + New(0, 0, 2710505, 7954489891797073920), + New(0, 0, 27105054, 5757922623132532736), + New(0, 0, 271050543, 2238994010196672512), + New(0, 0, 2710505431, 3943196028257173504), + New(0, 0, 27105054312, 2538472135152631808), + New(0, 0, 271050543121, 6937977277816766464), + New(0, 0, 2710505431213, 14039540557039009792), + New(0, 0, 27105054312137, 11268197054423236608), + New(0, 0, 271050543121376, 2001506101975056384), + New(0, 0, 2710505431213761, 1568316946041012224), + New(0, 0, 27105054312137610, 15683169460410122240), + New(0, 0, 271050543121376108, 9257742014424809472), + New(0, 0, 2710505431213761085, 343699775700336640), + New(0, 1, 8658310238428059234, 3436997757003366400), + New(0, 14, 12796126089442385877, 15923233496324112384), + New(0, 146, 17280796452166549082, 11658382373564710912), + New(0, 1469, 6787267858279526282, 5903359293389799424), + New(0, 14693, 12532446361666607975, 3693360712769339392), + New(0, 146936, 14643999174408770056, 40118980274290688), + New(0, 1469367, 17312783228120839248, 401189802742906880), + New(0, 14693679, 7107135617822427936, 4011898027429068800), + New(0, 146936793, 15731123957095624514, 3225492126871584768), + New(0, 1469367938, 9737286981279832213, 13808177195006296064), + New(0, 14693679385, 5139149444250564057, 8954563434096099328), + New(0, 146936793852, 14498006295086537342, 15758658046122786816), + New(0, 1469367938527, 15852854434898512116, 10012627871551455232), + New(0, 14693679385278, 10954591759308708237, 7892558346966794240), + New(0, 146936793852785, 17312197224539324294, 5138607174829735936), + New(0, 1469367938527859, 7101275582007278398, 14492583600878256128), + New(0, 14693679385278593, 15672523598944129139, 15798627492815699968), + New(0, 146936793852785938, 9151283399764878470, 10412322338480586752), + New(0, 1469367938527859384, 17725857702810578241, 11889503016258109440), + New(0, 14693679385278593849, 11237880364719817872, 8214565720323784704), + New(7, 17809585336819077184, 1698339204940869028, 8358680908399640576), + New(79, 12075156704804807296, 16983392049408690284, 9799832789158199296), + New(796, 10071102605790763273, 3813223830700938301, 5764607523034234880), + New(7965, 8477305689359874652, 1238750159590279781, 2305843009213693952), + New(79654, 10986080598760540056, 12387501595902797811, 4611686018427387904), + New(796545, 17627085619057642486, 13194551516770668416, 9223372036854775808), + New(7965459, 10250159527190460323, 2818306651739822853, 0), + New(79654595, 10267874903356845151, 9736322443688676914, 0), + New(796545955, 10445028665020693435, 5129504068339011060, 0), + New(7965459555, 12216566281659176272, 14401552535971007368, 0), + New(79654595556, 11485198374334453031, 14888316843743212368, 0), + New(796545955566, 4171519301087220622, 1309215847755710752, 0), + New(7965459555662, 4821704863453102988, 13092158477557107520, 0), + New(79654595556622, 11323560487111926655, 1794376259604213888, 0), + New(796545955566226, 2555140428861956854, 17943762596042138880, 0), + New(7965459555662261, 7104660214910016933, 13416929297035424256, 0), + New(79654595556622613, 15706369927971514489, 5042084454387381248, 0), + New(796545955566226138, 9489746690038731964, 13527356396454709248, 0), + } + + float64PowersOfTen = [...]float64{ + 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, + 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, + 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, + 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, + 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, + 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, + 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, + 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, + 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, 1e43, + 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, + 1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, + 1e68, 1e69, 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, + } +) diff --git a/go/arrow/decimal256/decimal256_test.go b/go/arrow/decimal256/decimal256_test.go new file mode 100644 index 0000000000000..719fb13e4eb31 --- /dev/null +++ b/go/arrow/decimal256/decimal256_test.go @@ -0,0 +1,225 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package decimal256_test + +import ( + "fmt" + "math" + "math/big" + "testing" + + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/stretchr/testify/assert" +) + +func TestFromU64(t *testing.T) { + for _, tc := range []struct { + v uint64 + want decimal256.Num + sign int + }{ + {0, decimal256.New(0, 0, 0, 0), 0}, + {1, decimal256.New(0, 0, 0, 1), +1}, + {2, decimal256.New(0, 0, 0, 2), +1}, + {math.MaxInt64, decimal256.New(0, 0, 0, math.MaxInt64), +1}, + {math.MaxUint64, decimal256.New(0, 0, 0, math.MaxUint64), +1}, + } { + t.Run(fmt.Sprintf("%+0#x", tc.v), func(t *testing.T) { + v := decimal256.FromU64(tc.v) + ref := new(big.Int).SetUint64(tc.v) + if got, want := v, tc.want; got != want { + t.Fatalf("invalid value. got=%+0#x, want=%+0#x (big-int=%+0#x)", got, want, ref) + } + if got, want := v.Sign(), tc.sign; got != want { + t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want) + } + if got, want := v.Sign(), ref.Sign(); got != want { + t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want) + } + if got, want := v.Array(), tc.want.Array(); got != want { + t.Fatalf("invalid array: got=%+0#v, want=%+0#v", got, want) + } + }) + } +} + +func u64Cnv(i int64) uint64 { return uint64(i) } + +func TestFromI64(t *testing.T) { + for _, tc := range []struct { + v int64 + want decimal256.Num + sign int + }{ + {0, decimal256.New(0, 0, 0, 0), 0}, + {1, decimal256.New(0, 0, 0, 1), 1}, + {2, decimal256.New(0, 0, 0, 2), 1}, + {math.MaxInt64, decimal256.New(0, 0, 0, math.MaxInt64), 1}, + {math.MinInt64, decimal256.New(math.MaxUint64, math.MaxUint64, math.MaxUint64, u64Cnv(math.MinInt64)), -1}, + } { + t.Run(fmt.Sprintf("%+0#x", tc.v), func(t *testing.T) { + v := decimal256.FromI64(tc.v) + ref := big.NewInt(tc.v) + if got, want := v, tc.want; got != want { + t.Fatalf("invalid value. got=%+0#x, want=%+0#x (big-int=%+0#x)", got, want, ref) + } + if got, want := v.Sign(), tc.sign; got != want { + t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want) + } + if got, want := v.Sign(), ref.Sign(); got != want { + t.Fatalf("invalid sign for %+0#x: got=%v, want=%v", v, got, want) + } + if got, want := v.Array(), tc.want.Array(); got != want { + t.Fatalf("invalid array: got=%+0#v, want=%+0#v", got, want) + } + }) + } +} + +func TestDecimalToBigInt(t *testing.T) { + tests := []struct { + arr [4]uint64 + exp string + }{ + {[4]uint64{0, 10084168908774762496, 12965995782233477362, 159309191113245227}, "1000000000000000000000000000000000000000000000000000000000000000000000000000"}, + {[4]uint64{0, 8362575164934789120, 5480748291476074253, 18287434882596306388}, "-1000000000000000000000000000000000000000000000000000000000000000000000000000"}, + {[4]uint64{0, 0, 0, 0}, "0"}, + {[4]uint64{17877984925544397504, 5352188884907840935, 234631617561833724, 196678011949953713}, "1234567890123456789012345678901234567890123456789012345678901234567890123456"}, + {[4]uint64{568759148165154112, 13094555188801710680, 18212112456147717891, 18250066061759597902}, "-1234567890123456789012345678901234567890123456789012345678901234567890123456"}, + } + for _, tc := range tests { + t.Run("", func(t *testing.T) { + n := decimal256.New(tc.arr[3], tc.arr[2], tc.arr[1], tc.arr[0]) + bi := n.BigInt() + + assert.Equal(t, tc.exp, bi.String()) + n2 := decimal256.FromBigInt(bi) + assert.Equal(t, n2.Array(), n.Array()) + }) + } +} + +func TestDecimalFromFloat(t *testing.T) { + tests := []struct { + val float64 + precision, scale int32 + expected string + }{ + {0, 1, 0, "0"}, + {math.Copysign(0, -1), 1, 0, "0"}, + {0, 19, 4, "0.0000"}, + {math.Copysign(0, -1), 19, 4, "0.0000"}, + {123.0, 7, 4, "123.0000"}, + {-123, 7, 4, "-123.0000"}, + {456.78, 7, 4, "456.7800"}, + {-456.78, 7, 4, "-456.7800"}, + {456.784, 5, 2, "456.78"}, + {-456.784, 5, 2, "-456.78"}, + {456.786, 5, 2, "456.79"}, + {-456.786, 5, 2, "-456.79"}, + {999.99, 5, 2, "999.99"}, + {-999.99, 5, 2, "-999.99"}, + {123, 19, 0, "123"}, + {-123, 19, 0, "-123"}, + {123.4, 19, 0, "123"}, + {-123.4, 19, 0, "-123"}, + {123.6, 19, 0, "124"}, + {-123.6, 19, 0, "-124"}, + // 2**62 + {4.611686018427387904e+18, 19, 0, "4611686018427387904"}, + {-4.611686018427387904e+18, 19, 0, "-4611686018427387904"}, + // 2**63 + {9.223372036854775808e+18, 19, 0, "9223372036854775808"}, + {-9.223372036854775808e+18, 19, 0, "-9223372036854775808"}, + // 2**64 + {1.8446744073709551616e+19, 20, 0, "18446744073709551616"}, + {-1.8446744073709551616e+19, 20, 0, "-18446744073709551616"}, + {9.999999999999999e+75, 76, 0, "9999999999999998863663300700064420349597509066704028242075715752105414230016"}, + {-9.999999999999999e+75, 76, 0, "-9999999999999998863663300700064420349597509066704028242075715752105414230016"}, + } + + t.Run("float64", func(t *testing.T) { + for _, tt := range tests { + t.Run(tt.expected, func(t *testing.T) { + n, err := decimal256.FromFloat64(tt.val, tt.precision, tt.scale) + assert.NoError(t, err) + + assert.Equal(t, tt.expected, big.NewFloat(n.ToFloat64(tt.scale)).Text('f', int(tt.scale))) + }) + } + + t.Run("large values", func(t *testing.T) { + // test entire float64 range + for scale := int32(-308); scale <= 308; scale++ { + val := math.Pow10(int(scale)) + n, err := decimal256.FromFloat64(val, 1, -scale) + assert.NoError(t, err) + assert.Equal(t, "1", n.BigInt().String()) + } + + for scale := int32(-307); scale <= 306; scale++ { + val := 123 * math.Pow10(int(scale)) + n, err := decimal256.FromFloat64(val, 2, -scale-1) + assert.NoError(t, err) + assert.Equal(t, "12", n.BigInt().String()) + n, err = decimal256.FromFloat64(val, 3, -scale) + assert.NoError(t, err) + assert.Equal(t, "123", n.BigInt().String()) + n, err = decimal256.FromFloat64(val, 4, -scale+1) + assert.NoError(t, err) + assert.Equal(t, "1230", n.BigInt().String()) + } + }) + }) + + t.Run("float32", func(t *testing.T) { + for _, tt := range tests { + if tt.precision > 38 { + continue + } + t.Run(tt.expected, func(t *testing.T) { + n, err := decimal256.FromFloat32(float32(tt.val), tt.precision, tt.scale) + assert.NoError(t, err) + + assert.Equal(t, tt.expected, big.NewFloat(float64(n.ToFloat32(tt.scale))).Text('f', int(tt.scale))) + }) + } + + t.Run("large values", func(t *testing.T) { + // test entire float32 range + for scale := int32(-38); scale <= 38; scale++ { + val := float32(math.Pow10(int(scale))) + n, err := decimal256.FromFloat32(val, 1, -scale) + assert.NoError(t, err) + assert.Equal(t, "1", n.BigInt().String()) + } + + for scale := int32(-37); scale <= 36; scale++ { + val := 123 * float32(math.Pow10(int(scale))) + n, err := decimal256.FromFloat32(val, 2, -scale-1) + assert.NoError(t, err) + assert.Equal(t, "12", n.BigInt().String()) + n, err = decimal256.FromFloat32(val, 3, -scale) + assert.NoError(t, err) + assert.Equal(t, "123", n.BigInt().String()) + n, err = decimal256.FromFloat32(val, 4, -scale+1) + assert.NoError(t, err) + assert.Equal(t, "1230", n.BigInt().String()) + } + }) + }) +} diff --git a/go/arrow/doc.go b/go/arrow/doc.go index bfa210da274ed..cf73f1a00b3f7 100644 --- a/go/arrow/doc.go +++ b/go/arrow/doc.go @@ -31,6 +31,8 @@ array is valid (not null). If the array has no null entries, it is possible to o */ package arrow +const PkgVersion = "10.0.0-SNAPSHOT" + //go:generate go run _tools/tmpl/main.go -i -data=numeric.tmpldata type_traits_numeric.gen.go.tmpl type_traits_numeric.gen_test.go.tmpl array/numeric.gen.go.tmpl array/numericbuilder.gen.go.tmpl array/bufferbuilder_numeric.gen.go.tmpl //go:generate go run _tools/tmpl/main.go -i -data=datatype_numeric.gen.go.tmpldata datatype_numeric.gen.go.tmpl tensor/numeric.gen.go.tmpl tensor/numeric.gen_test.go.tmpl //go:generate go run _tools/tmpl/main.go -i -data=scalar/numeric.gen.go.tmpldata scalar/numeric.gen.go.tmpl scalar/numeric.gen_test.go.tmpl @@ -38,3 +40,4 @@ package arrow // stringer //go:generate stringer -type=Type +//go:generate stringer -type=UnionMode -linecomment diff --git a/go/arrow/endian/big.go b/go/arrow/endian/big.go index ebd36539db052..0b92585745f42 100644 --- a/go/arrow/endian/big.go +++ b/go/arrow/endian/big.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build s390x // +build s390x package endian @@ -22,4 +23,8 @@ import "encoding/binary" var Native = binary.BigEndian -const IsBigEndian = true +const ( + IsBigEndian = true + NativeEndian = BigEndian + NonNativeEndian = LittleEndian +) diff --git a/go/arrow/endian/endian.go b/go/arrow/endian/endian.go new file mode 100644 index 0000000000000..37ee3b16725c9 --- /dev/null +++ b/go/arrow/endian/endian.go @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package endian + +import ( + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" +) + +type Endianness flatbuf.Endianness + +const ( + LittleEndian Endianness = Endianness(flatbuf.EndiannessLittle) + BigEndian Endianness = Endianness(flatbuf.EndiannessBig) +) + +func (e Endianness) String() string { + switch e { + case LittleEndian: + return "little" + case BigEndian: + return "big" + default: + debug.Assert(false, "wtf? bad endianness value") + return "???" + } +} diff --git a/go/arrow/endian/little.go b/go/arrow/endian/little.go index d98b5c97a9d75..def1fc64b9e64 100644 --- a/go/arrow/endian/little.go +++ b/go/arrow/endian/little.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !s390x // +build !s390x package endian @@ -22,4 +23,8 @@ import "encoding/binary" var Native = binary.LittleEndian -var IsBigEndian = false +const ( + IsBigEndian = false + NativeEndian = LittleEndian + NonNativeEndian = BigEndian +) diff --git a/go/arrow/errors.go b/go/arrow/errors.go new file mode 100644 index 0000000000000..74214ae822036 --- /dev/null +++ b/go/arrow/errors.go @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arrow + +import "errors" + +var ( + ErrInvalid = errors.New("invalid") + ErrNotImplemented = errors.New("not implemented") + ErrType = errors.New("type error") + ErrKey = errors.New("key error") +) diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go index 5b54621a9fc0c..f95076738a7a6 100644 --- a/go/arrow/example_test.go +++ b/go/arrow/example_test.go @@ -20,10 +20,10 @@ import ( "fmt" "log" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/arrow/tensor" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/tensor" ) // This example demonstrates how to build an array of int64 values using a builder and Append. @@ -647,3 +647,179 @@ func Example_mapArray() { // Map[2] = {ab => (null), cd => 2, ef => 5, gh => 1} // Map = [{["ab" "cd" "ef" "gh"] [1 2 3 4]} (null) {["ab" "cd" "ef" "gh"] [(null) 2 5 1]}] } + +func Example_sparseUnionArray() { + pool := memory.NewGoAllocator() + + sparseBuilder := array.NewEmptySparseUnionBuilder(pool) + defer sparseBuilder.Release() + + i8Builder := array.NewInt8Builder(pool) + defer i8Builder.Release() + i8Code := sparseBuilder.AppendChild(i8Builder, "i8") + + strBuilder := array.NewStringBuilder(pool) + defer strBuilder.Release() + strCode := sparseBuilder.AppendChild(strBuilder, "str") + + f64Builder := array.NewFloat64Builder(pool) + defer f64Builder.Release() + f64Code := sparseBuilder.AppendChild(f64Builder, "f64") + + values := []interface{}{int8(33), "abc", float64(1.0), float64(-1.0), nil, + "", int8(10), "def", int8(-10), float64(0.5)} + + for _, v := range values { + switch v := v.(type) { + case int8: + sparseBuilder.Append(i8Code) + i8Builder.Append(v) + strBuilder.AppendEmptyValue() + f64Builder.AppendEmptyValue() + case string: + sparseBuilder.Append(strCode) + i8Builder.AppendEmptyValue() + strBuilder.Append(v) + f64Builder.AppendEmptyValue() + case float64: + sparseBuilder.Append(f64Code) + i8Builder.AppendEmptyValue() + strBuilder.AppendEmptyValue() + f64Builder.Append(v) + case nil: + sparseBuilder.AppendNull() + } + } + + arr := sparseBuilder.NewSparseUnionArray() + defer arr.Release() + + fmt.Printf("Len() = %d\n", arr.Len()) + fields := arr.UnionType().Fields() + for i := 0; i < arr.Len(); i++ { + child := arr.ChildID(i) + data := arr.Field(child) + field := fields[child] + + if data.IsNull(i) { + fmt.Printf("[%d] = (null)\n", i) + continue + } + var v interface{} + switch varr := data.(type) { + case *array.Int8: + v = varr.Value(i) + case *array.String: + v = varr.Value(i) + case *array.Float64: + v = varr.Value(i) + } + fmt.Printf("[%d] = %#5v {%s}\n", i, v, field.Name) + } + + fmt.Printf("i8: %s\n", arr.Field(0)) + fmt.Printf("str: %s\n", arr.Field(1)) + fmt.Printf("f64: %s\n", arr.Field(2)) + + // Output: + // Len() = 10 + // [0] = 33 {i8} + // [1] = "abc" {str} + // [2] = 1 {f64} + // [3] = -1 {f64} + // [4] = (null) + // [5] = "" {str} + // [6] = 10 {i8} + // [7] = "def" {str} + // [8] = -10 {i8} + // [9] = 0.5 {f64} + // i8: [33 0 0 0 (null) 0 10 0 -10 0] + // str: ["" "abc" "" "" "" "" "" "def" "" ""] + // f64: [0 0 1 -1 0 0 0 0 0 0.5] +} + +func Example_denseUnionArray() { + pool := memory.NewGoAllocator() + + denseBuilder := array.NewEmptyDenseUnionBuilder(pool) + defer denseBuilder.Release() + + i8Builder := array.NewInt8Builder(pool) + defer i8Builder.Release() + i8Code := denseBuilder.AppendChild(i8Builder, "i8") + + strBuilder := array.NewStringBuilder(pool) + defer strBuilder.Release() + strCode := denseBuilder.AppendChild(strBuilder, "str") + + f64Builder := array.NewFloat64Builder(pool) + defer f64Builder.Release() + f64Code := denseBuilder.AppendChild(f64Builder, "f64") + + values := []interface{}{int8(33), "abc", float64(1.0), float64(-1.0), nil, + "", int8(10), "def", int8(-10), float64(0.5)} + + for _, v := range values { + switch v := v.(type) { + case int8: + denseBuilder.Append(i8Code) + i8Builder.Append(v) + case string: + denseBuilder.Append(strCode) + strBuilder.Append(v) + case float64: + denseBuilder.Append(f64Code) + f64Builder.Append(v) + case nil: + denseBuilder.AppendNull() + } + } + + arr := denseBuilder.NewDenseUnionArray() + defer arr.Release() + + fmt.Printf("Len() = %d\n", arr.Len()) + fields := arr.UnionType().Fields() + offsets := arr.RawValueOffsets() + for i := 0; i < arr.Len(); i++ { + child := arr.ChildID(i) + data := arr.Field(child) + field := fields[child] + + idx := int(offsets[i]) + if data.IsNull(idx) { + fmt.Printf("[%d] = (null)\n", i) + continue + } + var v interface{} + switch varr := data.(type) { + case *array.Int8: + v = varr.Value(idx) + case *array.String: + v = varr.Value(idx) + case *array.Float64: + v = varr.Value(idx) + } + fmt.Printf("[%d] = %#5v {%s}\n", i, v, field.Name) + } + + fmt.Printf("i8: %s\n", arr.Field(0)) + fmt.Printf("str: %s\n", arr.Field(1)) + fmt.Printf("f64: %s\n", arr.Field(2)) + + // Output: + // Len() = 10 + // [0] = 33 {i8} + // [1] = "abc" {str} + // [2] = 1 {f64} + // [3] = -1 {f64} + // [4] = (null) + // [5] = "" {str} + // [6] = 10 {i8} + // [7] = "def" {str} + // [8] = -10 {i8} + // [9] = 0.5 {f64} + // i8: [33 (null) 10 -10] + // str: ["abc" "" "def"] + // f64: [1 -1 0.5] +} diff --git a/go/arrow/flight/basic_auth_flight_test.go b/go/arrow/flight/basic_auth_flight_test.go index bf6aab3feca54..78587799736b9 100755 --- a/go/arrow/flight/basic_auth_flight_test.go +++ b/go/arrow/flight/basic_auth_flight_test.go @@ -22,7 +22,7 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v9/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/flight" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/client.go b/go/arrow/flight/client.go index 3dfcb467a9106..1039f6d3e2505 100644 --- a/go/arrow/flight/client.go +++ b/go/arrow/flight/client.go @@ -26,7 +26,7 @@ import ( "strings" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow/flight/internal/flight" + "github.com/apache/arrow/go/v10/arrow/flight/internal/flight" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/metadata" diff --git a/go/arrow/flight/example_flight_server_test.go b/go/arrow/flight/example_flight_server_test.go index c790999eb957f..a37fbd700b333 100755 --- a/go/arrow/flight/example_flight_server_test.go +++ b/go/arrow/flight/example_flight_server_test.go @@ -23,7 +23,7 @@ import ( "io" "log" - "github.com/apache/arrow/go/v9/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/flight" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/flight_middleware_test.go b/go/arrow/flight/flight_middleware_test.go index e189b12fc8e6a..b515528b2b511 100755 --- a/go/arrow/flight/flight_middleware_test.go +++ b/go/arrow/flight/flight_middleware_test.go @@ -23,8 +23,8 @@ import ( sync "sync" "testing" - "github.com/apache/arrow/go/v9/arrow/flight" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/grpc" diff --git a/go/arrow/flight/flight_test.go b/go/arrow/flight/flight_test.go index 5874394645c19..ccef08923084f 100755 --- a/go/arrow/flight/flight_test.go +++ b/go/arrow/flight/flight_test.go @@ -23,11 +23,11 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/flight" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials/insecure" diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go new file mode 100644 index 0000000000000..b8ee01cfdeaab --- /dev/null +++ b/go/arrow/flight/flightsql/client.go @@ -0,0 +1,574 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flightsql + +import ( + "context" + "errors" + "io" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/flight" + pb "github.com/apache/arrow/go/v10/arrow/flight/internal/flight" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" + "google.golang.org/grpc" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" +) + +// NewClient is a convenience function to automatically construct +// a flight.Client and return a flightsql.Client containing it rather +// than having to manually construct both yourself. It just delegates +// its arguments to flight.NewClientWithMiddleware to create the +// underlying Flight Client. +func NewClient(addr string, auth flight.ClientAuthHandler, middleware []flight.ClientMiddleware, opts ...grpc.DialOption) (*Client, error) { + cl, err := flight.NewClientWithMiddleware(addr, auth, middleware, opts...) + if err != nil { + return nil, err + } + return &Client{cl, memory.DefaultAllocator}, nil +} + +// Client wraps a regular Flight RPC Client to provide the FlightSQL +// interface functions and methods. +type Client struct { + Client flight.Client + + Alloc memory.Allocator +} + +func descForCommand(cmd proto.Message) (*flight.FlightDescriptor, error) { + var any anypb.Any + if err := any.MarshalFrom(cmd); err != nil { + return nil, err + } + + data, err := proto.Marshal(&any) + if err != nil { + return nil, err + } + return &flight.FlightDescriptor{ + Type: flight.DescriptorCMD, + Cmd: data, + }, nil +} + +func flightInfoForCommand(ctx context.Context, cl *Client, cmd proto.Message, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + desc, err := descForCommand(cmd) + if err != nil { + return nil, err + } + return cl.getFlightInfo(ctx, desc, opts...) +} + +func schemaForCommand(ctx context.Context, cl *Client, cmd proto.Message, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + desc, err := descForCommand(cmd) + if err != nil { + return nil, err + } + return cl.getSchema(ctx, desc, opts...) +} + +// Execute executes the desired query on the server and returns a FlightInfo +// object describing where to retrieve the results. +func (c *Client) Execute(ctx context.Context, query string, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + cmd := pb.CommandStatementQuery{Query: query} + return flightInfoForCommand(ctx, c, &cmd, opts...) +} + +// GetExecuteSchema gets the schema of the result set of a query without +// executing the query itself. +func (c *Client) GetExecuteSchema(ctx context.Context, query string, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + cmd := pb.CommandStatementQuery{Query: query} + return schemaForCommand(ctx, c, &cmd, opts...) +} + +// ExecuteUpdate is for executing an update query and only returns the number of affected rows. +func (c *Client) ExecuteUpdate(ctx context.Context, query string, opts ...grpc.CallOption) (n int64, err error) { + var ( + cmd pb.CommandStatementUpdate + desc *flight.FlightDescriptor + stream pb.FlightService_DoPutClient + res *pb.PutResult + updateResult pb.DoPutUpdateResult + ) + + cmd.Query = query + if desc, err = descForCommand(&cmd); err != nil { + return + } + + if stream, err = c.Client.DoPut(ctx, opts...); err != nil { + return + } + + if err = stream.Send(&flight.FlightData{FlightDescriptor: desc}); err != nil { + return + } + + if err = stream.CloseSend(); err != nil { + return + } + + if res, err = stream.Recv(); err != nil { + return + } + + if err = proto.Unmarshal(res.GetAppMetadata(), &updateResult); err != nil { + return + } + + return updateResult.GetRecordCount(), nil +} + +// GetCatalogs requests the list of catalogs from the server and +// returns a flightInfo object where the response can be retrieved +func (c *Client) GetCatalogs(ctx context.Context, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + return flightInfoForCommand(ctx, c, &pb.CommandGetCatalogs{}, opts...) +} + +// GetCatalogsSchema requests the schema of GetCatalogs from the server +func (c *Client) GetCatalogsSchema(ctx context.Context, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return schemaForCommand(ctx, c, &pb.CommandGetCatalogs{}, opts...) +} + +// GetDBSchemas requests the list of schemas from the database and +// returns a FlightInfo object where the response can be retrieved +func (c *Client) GetDBSchemas(ctx context.Context, cmdOpts *GetDBSchemasOpts, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + return flightInfoForCommand(ctx, c, (*pb.CommandGetDbSchemas)(cmdOpts), opts...) +} + +// GetDBSchemasSchema requests the schema of GetDBSchemas from the server +func (c *Client) GetDBSchemasSchema(ctx context.Context, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return schemaForCommand(ctx, c, &pb.CommandGetDbSchemas{}, opts...) +} + +// DoGet uses the provided flight ticket to request the stream of data. +// It returns a recordbatch reader to stream the results. Release +// should be called on the reader when done. +func (c *Client) DoGet(ctx context.Context, in *flight.Ticket, opts ...grpc.CallOption) (*flight.Reader, error) { + stream, err := c.Client.DoGet(ctx, in, opts...) + if err != nil { + return nil, err + } + + return flight.NewRecordReader(stream, ipc.WithAllocator(c.Alloc)) +} + +// GetTables requests a list of tables from the server, with the provided +// options describing how to make the request (filter patterns, if the schema +// should be returned, etc.). Returns a FlightInfo object where the response +// can be retrieved. +func (c *Client) GetTables(ctx context.Context, reqOptions *GetTablesOpts, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + return flightInfoForCommand(ctx, c, (*pb.CommandGetTables)(reqOptions), opts...) +} + +// GetTablesSchema requests the schema of GetTables from the server. +func (c *Client) GetTablesSchema(ctx context.Context, reqOptions *GetTablesOpts, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return schemaForCommand(ctx, c, (*pb.CommandGetTables)(reqOptions), opts...) +} + +// GetPrimaryKeys requests the primary keys for a specific table from the +// server, specified using a TableRef. Returns a FlightInfo object where +// the response can be retrieved. +func (c *Client) GetPrimaryKeys(ctx context.Context, ref TableRef, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + cmd := pb.CommandGetPrimaryKeys{ + Catalog: ref.Catalog, + DbSchema: ref.DBSchema, + Table: ref.Table, + } + return flightInfoForCommand(ctx, c, &cmd, opts...) +} + +// GetPrimaryKeysSchema requests the schema of GetPrimaryKeys from the server. +func (c *Client) GetPrimaryKeysSchema(ctx context.Context, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return schemaForCommand(ctx, c, &pb.CommandGetPrimaryKeys{}, opts...) +} + +// GetExportedKeys retrieves a description about the foreign key columns +// that reference the primary key columns of the specified table. Returns +// a FlightInfo object where the response can be retrieved. +func (c *Client) GetExportedKeys(ctx context.Context, ref TableRef, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + cmd := pb.CommandGetExportedKeys{ + Catalog: ref.Catalog, + DbSchema: ref.DBSchema, + Table: ref.Table, + } + return flightInfoForCommand(ctx, c, &cmd, opts...) +} + +// GetExportedKeysSchema requests the schema of GetExportedKeys from the server. +func (c *Client) GetExportedKeysSchema(ctx context.Context, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return schemaForCommand(ctx, c, &pb.CommandGetExportedKeys{}, opts...) +} + +// GetImportedKeys returns the foreign key columns for the specified table. +// Returns a FlightInfo object indicating where the response can be retrieved. +func (c *Client) GetImportedKeys(ctx context.Context, ref TableRef, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + cmd := pb.CommandGetImportedKeys{ + Catalog: ref.Catalog, + DbSchema: ref.DBSchema, + Table: ref.Table, + } + return flightInfoForCommand(ctx, c, &cmd, opts...) +} + +// GetImportedKeysSchema requests the schema of GetImportedKeys from the server. +func (c *Client) GetImportedKeysSchema(ctx context.Context, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return schemaForCommand(ctx, c, &pb.CommandGetImportedKeys{}, opts...) +} + +// GetCrossReference retrieves a description of the foreign key columns +// in the specified ForeignKey table that reference the primary key or +// columns representing a restraint of the parent table (could be the same +// or a different table). Returns a FlightInfo object indicating where +// the response can be retrieved with DoGet. +func (c *Client) GetCrossReference(ctx context.Context, pkTable, fkTable TableRef, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + cmd := pb.CommandGetCrossReference{ + PkCatalog: pkTable.Catalog, + PkDbSchema: pkTable.DBSchema, + PkTable: pkTable.Table, + FkCatalog: fkTable.Catalog, + FkDbSchema: fkTable.DBSchema, + FkTable: fkTable.Table, + } + return flightInfoForCommand(ctx, c, &cmd, opts...) +} + +// GetCrossReferenceSchema requests the schema of GetCrossReference from the server. +func (c *Client) GetCrossReferenceSchema(ctx context.Context, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return schemaForCommand(ctx, c, &pb.CommandGetCrossReference{}, opts...) +} + +// GetTableTypes requests a list of the types of tables available on this +// server. Returns a FlightInfo object indicating where the response can +// be retrieved. +func (c *Client) GetTableTypes(ctx context.Context, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + return flightInfoForCommand(ctx, c, &pb.CommandGetTableTypes{}, opts...) +} + +// GetTableTypesSchema requests the schema of GetTableTypes from the server. +func (c *Client) GetTableTypesSchema(ctx context.Context, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return schemaForCommand(ctx, c, &pb.CommandGetTableTypes{}, opts...) +} + +// GetXdbcTypeInfo requests the information about all the data types supported +// (dataType == nil) or a specific data type. Returns a FlightInfo object +// indicating where the response can be retrieved. +func (c *Client) GetXdbcTypeInfo(ctx context.Context, dataType *int32, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + return flightInfoForCommand(ctx, c, &pb.CommandGetXdbcTypeInfo{DataType: dataType}, opts...) +} + +// GetXdbcTypeInfoSchema requests the schema of GetXdbcTypeInfo from the server. +func (c *Client) GetXdbcTypeInfoSchema(ctx context.Context, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return schemaForCommand(ctx, c, &pb.CommandGetXdbcTypeInfo{}, opts...) +} + +// GetSqlInfo returns a list of the requested SQL information corresponding +// to the values in the info slice. Returns a FlightInfo object indicating +// where the response can be retrieved. +func (c *Client) GetSqlInfo(ctx context.Context, info []SqlInfo, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + cmd := &pb.CommandGetSqlInfo{Info: make([]uint32, len(info))} + + for i, v := range info { + cmd.Info[i] = uint32(v) + } + return flightInfoForCommand(ctx, c, cmd, opts...) +} + +// GetSqlInfoSchema requests the schema of GetSqlInfo from the server. +func (c *Client) GetSqlInfoSchema(ctx context.Context, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return schemaForCommand(ctx, c, &pb.CommandGetSqlInfo{}, opts...) +} + +// Prepare creates a PreparedStatement object for the specified query. +// The resulting PreparedStatement object should be Closed when no longer +// needed. It will maintain a reference to this Client for use to execute +// and use the specified allocator for any allocations it needs to perform. +func (c *Client) Prepare(ctx context.Context, mem memory.Allocator, query string, opts ...grpc.CallOption) (prep *PreparedStatement, err error) { + const actionType = CreatePreparedStatementActionType + + var ( + cmd, cmdResult anypb.Any + res *pb.Result + request pb.ActionCreatePreparedStatementRequest + result pb.ActionCreatePreparedStatementResult + action pb.Action + stream pb.FlightService_DoActionClient + dsSchema, paramSchema *arrow.Schema + ) + + request.Query = query + if err = cmd.MarshalFrom(&request); err != nil { + return + } + + action.Type = actionType + if action.Body, err = proto.Marshal(&cmd); err != nil { + return + } + + if stream, err = c.Client.DoAction(ctx, &action, opts...); err != nil { + return + } + + if res, err = stream.Recv(); err != nil { + return + } + + if err = proto.Unmarshal(res.Body, &cmdResult); err != nil { + return + } + + if err = cmdResult.UnmarshalTo(&result); err != nil { + return + } + + if result.DatasetSchema != nil { + dsSchema, err = flight.DeserializeSchema(result.DatasetSchema, mem) + if err != nil { + return + } + } + if result.ParameterSchema != nil { + paramSchema, err = flight.DeserializeSchema(result.ParameterSchema, mem) + if err != nil { + return + } + } + + prep = &PreparedStatement{ + client: c, + opts: opts, + handle: result.PreparedStatementHandle, + datasetSchema: dsSchema, + paramSchema: paramSchema, + } + return +} + +func (c *Client) getFlightInfo(ctx context.Context, desc *flight.FlightDescriptor, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + return c.Client.GetFlightInfo(ctx, desc, opts...) +} + +func (c *Client) getSchema(ctx context.Context, desc *flight.FlightDescriptor, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + return c.Client.GetSchema(ctx, desc, opts...) +} + +// Close will close the underlying flight Client in use by this flightsql.Client +func (c *Client) Close() error { return c.Client.Close() } + +// PreparedStatement represents a constructed PreparedStatement on the server +// and maintains a reference to the Client that created it along with the +// prepared statement handle. +// +// If the server returned the Dataset Schema or Parameter Binding schemas +// at creation, they will also be accessible from this object. Close +// should be called when no longer needed. +type PreparedStatement struct { + client *Client + opts []grpc.CallOption + handle []byte + datasetSchema *arrow.Schema + paramSchema *arrow.Schema + paramBinding arrow.Record + closed bool +} + +// Execute executes the prepared statement on the server and returns a FlightInfo +// indicating where to retrieve the response. If SetParameters has been called +// then the parameter bindings will be sent before execution. +// +// Will error if already closed. +func (p *PreparedStatement) Execute(ctx context.Context) (*flight.FlightInfo, error) { + if p.closed { + return nil, errors.New("arrow/flightsql: prepared statement already closed") + } + + cmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: p.handle} + + desc, err := descForCommand(cmd) + if err != nil { + return nil, err + } + + if p.paramBinding != nil && p.paramBinding.NumRows() > 0 { + pstream, err := p.client.Client.DoPut(ctx, p.opts...) + if err != nil { + return nil, err + } + + wr := flight.NewRecordWriter(pstream, ipc.WithSchema(p.paramBinding.Schema())) + wr.SetFlightDescriptor(desc) + if err = wr.Write(p.paramBinding); err != nil { + return nil, err + } + if err = wr.Close(); err != nil { + return nil, err + } + pstream.CloseSend() + + // wait for the server to ack the result + if _, err = pstream.Recv(); err != nil && err != io.EOF { + return nil, err + } + } + + return p.client.getFlightInfo(ctx, desc, p.opts...) +} + +// ExecuteUpdate executes the prepared statement update query on the server +// and returns the number of rows affected. If SetParameters was called, +// the parameter bindings will be sent with the request to execute. +func (p *PreparedStatement) ExecuteUpdate(ctx context.Context) (nrecords int64, err error) { + if p.closed { + return 0, errors.New("arrow/flightsql: prepared statement already closed") + } + + var ( + execCmd = &pb.CommandPreparedStatementUpdate{PreparedStatementHandle: p.handle} + desc *flight.FlightDescriptor + pstream pb.FlightService_DoPutClient + wr *flight.Writer + res *pb.PutResult + updateResult pb.DoPutUpdateResult + ) + + desc, err = descForCommand(execCmd) + if err != nil { + return + } + + if pstream, err = p.client.Client.DoPut(ctx, p.opts...); err != nil { + return + } + if p.paramBinding != nil && p.paramBinding.NumRows() > 0 { + wr = flight.NewRecordWriter(pstream, ipc.WithSchema(p.paramBinding.Schema())) + wr.SetFlightDescriptor(desc) + if err = wr.Write(p.paramBinding); err != nil { + return + } + } else { + schema := arrow.NewSchema([]arrow.Field{}, nil) + wr = flight.NewRecordWriter(pstream, ipc.WithSchema(schema)) + wr.SetFlightDescriptor(desc) + rec := array.NewRecord(schema, []arrow.Array{}, 0) + if err = wr.Write(rec); err != nil { + return + } + } + + if err = wr.Close(); err != nil { + return + } + if err = pstream.CloseSend(); err != nil { + return + } + if res, err = pstream.Recv(); err != nil { + return + } + + if err = proto.Unmarshal(res.GetAppMetadata(), &updateResult); err != nil { + return + } + + return updateResult.GetRecordCount(), nil +} + +// DatasetSchema may be nil if the server did not return it when creating the +// Prepared Statement. +func (p *PreparedStatement) DatasetSchema() *arrow.Schema { return p.datasetSchema } + +// ParameterSchema may be nil if the server did not return it when creating +// the prepared statement. +func (p *PreparedStatement) ParameterSchema() *arrow.Schema { return p.paramSchema } + +// GetSchema re-requests the schema of the result set of the prepared +// statement from the server. It should otherwise be identical to DatasetSchema. +// +// Will error if already closed. +func (p *PreparedStatement) GetSchema(ctx context.Context) (*flight.SchemaResult, error) { + if p.closed { + return nil, errors.New("arrow/flightsql: prepared statement already closed") + } + + cmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: p.handle} + + desc, err := descForCommand(cmd) + if err != nil { + return nil, err + } + + return p.client.getSchema(ctx, desc, p.opts...) +} + +// SetParameters takes a record batch to send as the parameter bindings when +// executing. It should match the schema from ParameterSchema. +// +// This will call Retain on the record to ensure it doesn't get released +// out from under the statement. Release will be called on a previous +// binding record if it existed, and will be called upon calling Close +// on the PreparedStatement. +func (p *PreparedStatement) SetParameters(binding arrow.Record) { + if p.paramBinding != nil { + p.paramBinding.Release() + p.paramBinding = nil + } + p.paramBinding = binding + p.paramBinding.Retain() +} + +// Close calls release on any parameter binding record and sends +// a ClosePreparedStatement action to the server. After calling +// Close, the PreparedStatement should not be used again. +func (p *PreparedStatement) Close(ctx context.Context) error { + if p.closed { + return errors.New("arrow/flightsql: already closed") + } + + if p.paramBinding != nil { + p.paramBinding.Release() + p.paramBinding = nil + } + + const actionType = ClosePreparedStatementActionType + var ( + cmd anypb.Any + request pb.ActionClosePreparedStatementRequest + ) + + request.PreparedStatementHandle = p.handle + if err := cmd.MarshalFrom(&request); err != nil { + return err + } + + body, err := proto.Marshal(&cmd) + if err != nil { + return err + } + + action := &flight.Action{Type: actionType, Body: body} + _, err = p.client.Client.DoAction(ctx, action, p.opts...) + if err != nil { + return err + } + + p.closed = true + return nil +} diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go new file mode 100644 index 0000000000000..92e468313910c --- /dev/null +++ b/go/arrow/flight/flightsql/client_test.go @@ -0,0 +1,469 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flightsql_test + +import ( + "context" + "strings" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql" + pb "github.com/apache/arrow/go/v10/arrow/flight/internal/flight" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" +) + +type mockGrpcClientStream struct { + mock.Mock +} + +func (m *mockGrpcClientStream) Header() (metadata.MD, error) { panic("unimplemented") } +func (m *mockGrpcClientStream) Trailer() metadata.MD { panic("unimplemented") } +func (m *mockGrpcClientStream) CloseSend() error { return m.Called().Error(0) } +func (m *mockGrpcClientStream) Context() context.Context { return context.TODO() } +func (m *mockGrpcClientStream) SendMsg(msg interface{}) error { return m.Called(msg).Error(0) } +func (m *mockGrpcClientStream) RecvMsg(msg interface{}) error { return m.Called(msg).Error(0) } + +type FlightServiceClientMock struct { + mock.Mock +} + +func (m *FlightServiceClientMock) Authenticate(_ context.Context, opts ...grpc.CallOption) error { + return m.Called(opts).Error(0) +} + +func (m *FlightServiceClientMock) AuthenticateBasicToken(_ context.Context, user, pass string, opts ...grpc.CallOption) (context.Context, error) { + args := m.Called(user, pass, opts) + return args.Get(0).(context.Context), args.Error(1) +} + +func (m *FlightServiceClientMock) Close() error { + return m.Called().Error(0) +} + +func (m *FlightServiceClientMock) Handshake(ctx context.Context, opts ...grpc.CallOption) (flight.FlightService_HandshakeClient, error) { + panic("not implemented") // TODO: Implement +} + +func (m *FlightServiceClientMock) ListFlights(ctx context.Context, in *flight.Criteria, opts ...grpc.CallOption) (flight.FlightService_ListFlightsClient, error) { + panic("not implemented") // TODO: Implement +} + +func (m *FlightServiceClientMock) GetFlightInfo(ctx context.Context, in *flight.FlightDescriptor, opts ...grpc.CallOption) (*flight.FlightInfo, error) { + args := m.Called(in.Type, in.Cmd, opts) + return args.Get(0).(*flight.FlightInfo), args.Error(1) +} + +func (m *FlightServiceClientMock) GetSchema(ctx context.Context, in *flight.FlightDescriptor, opts ...grpc.CallOption) (*flight.SchemaResult, error) { + panic("not implemented") // TODO: Implement +} + +func (m *FlightServiceClientMock) DoGet(ctx context.Context, in *flight.Ticket, opts ...grpc.CallOption) (flight.FlightService_DoGetClient, error) { + panic("not implemented") // TODO: Implement +} + +func (m *FlightServiceClientMock) DoPut(ctx context.Context, opts ...grpc.CallOption) (flight.FlightService_DoPutClient, error) { + args := m.Called(opts) + return args.Get(0).(flight.FlightService_DoPutClient), args.Error(1) +} + +func (m *FlightServiceClientMock) DoExchange(ctx context.Context, opts ...grpc.CallOption) (flight.FlightService_DoExchangeClient, error) { + panic("not implemented") // TODO: Implement +} + +func (m *FlightServiceClientMock) DoAction(ctx context.Context, in *flight.Action, opts ...grpc.CallOption) (flight.FlightService_DoActionClient, error) { + args := m.Called(in.Type, in.Body, opts) + return args.Get(0).(flight.FlightService_DoActionClient), args.Error(1) +} + +func (m *FlightServiceClientMock) ListActions(ctx context.Context, in *flight.Empty, opts ...grpc.CallOption) (flight.FlightService_ListActionsClient, error) { + panic("not implemented") // TODO: Implement +} + +type FlightSqlClientSuite struct { + suite.Suite + + mockClient FlightServiceClientMock + callOpts []grpc.CallOption + sqlClient flightsql.Client +} + +func getDesc(cmd proto.Message) *flight.FlightDescriptor { + var anycmd anypb.Any + anycmd.MarshalFrom(cmd) + + data, _ := proto.Marshal(&anycmd) + return &flight.FlightDescriptor{ + Type: flight.DescriptorCMD, + Cmd: data, + } +} + +func getAction(cmd proto.Message) *flight.Action { + var anycmd anypb.Any + anycmd.MarshalFrom(cmd) + + data, _ := proto.Marshal(&anycmd) + return &flight.Action{Body: data} +} + +func (s *FlightSqlClientSuite) SetupTest() { + s.mockClient = FlightServiceClientMock{} + s.sqlClient.Client = &s.mockClient +} + +func (s *FlightSqlClientSuite) TearDownTest() { + s.mockClient.AssertExpectations(s.T()) +} + +var emptyFlightInfo flight.FlightInfo + +func (s *FlightSqlClientSuite) TestGetCatalogs() { + var cmd pb.CommandGetCatalogs + desc := getDesc(&cmd) + + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.GetCatalogs(context.Background(), s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestGetDBSchemas() { + var ( + schemaFilterPattern = "schema_filter_pattern" + catalog = "catalog" + ) + + cmd := &pb.CommandGetDbSchemas{ + Catalog: &catalog, + DbSchemaFilterPattern: &schemaFilterPattern, + } + desc := getDesc(cmd) + + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.GetDBSchemas(context.Background(), (*flightsql.GetDBSchemasOpts)(cmd), s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestGetTables() { + var ( + catalog = "catalog" + schemaFilterPattern = "schema_filter_pattern" + tableNameFilterPattern = "table_name_filter_pattern" + includeSchema = true + tableTypes = []string{"type1", "type2"} + ) + + cmd := &pb.CommandGetTables{ + Catalog: &catalog, + DbSchemaFilterPattern: &schemaFilterPattern, + TableNameFilterPattern: &tableNameFilterPattern, + IncludeSchema: includeSchema, + TableTypes: tableTypes, + } + desc := getDesc(cmd) + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.GetTables(context.Background(), (*flightsql.GetTablesOpts)(cmd), s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestGetTableTypes() { + var cmd pb.CommandGetTableTypes + desc := getDesc(&cmd) + + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.GetTableTypes(context.Background(), s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestGetTypeInfo() { + var cmd pb.CommandGetXdbcTypeInfo + desc := getDesc(&cmd) + + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.GetXdbcTypeInfo(context.Background(), nil, s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestGetExported() { + var ( + catalog = "catalog" + schema = "schema" + table = "table" + ) + + cmd := &pb.CommandGetExportedKeys{ + Catalog: &catalog, + DbSchema: &schema, + Table: table, + } + desc := getDesc(cmd) + + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.GetExportedKeys(context.Background(), flightsql.TableRef{&catalog, &schema, table}, s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestGetImported() { + var ( + schema = "schema" + table = "table" + ) + + cmd := &pb.CommandGetImportedKeys{ + DbSchema: &schema, + Table: table, + } + desc := getDesc(cmd) + + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.GetImportedKeys(context.Background(), flightsql.TableRef{nil, &schema, table}, s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestGetPrimary() { + var ( + catalog = "catalog" + table = "table" + ) + + cmd := &pb.CommandGetPrimaryKeys{ + Catalog: &catalog, + Table: table, + } + desc := getDesc(cmd) + + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.GetPrimaryKeys(context.Background(), flightsql.TableRef{&catalog, nil, table}, s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestGetCrossReference() { + var ( + pkCatalog = "pk_catalog" + pkSchema = "pk_schema" + pkTable = "pk_table" + fkCatalog = "fk_catalog" + fkSchema = "fk_schema" + fkTable = "fk_table" + ) + + cmd := &pb.CommandGetCrossReference{ + PkCatalog: &pkCatalog, + PkDbSchema: &pkSchema, + PkTable: pkTable, + FkCatalog: &fkCatalog, + FkDbSchema: &fkSchema, + FkTable: fkTable, + } + desc := getDesc(cmd) + + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.GetCrossReference(context.Background(), + flightsql.TableRef{&pkCatalog, &pkSchema, pkTable}, + flightsql.TableRef{&fkCatalog, &fkSchema, fkTable}, s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestExecute() { + var query = "query" + + cmd := &pb.CommandStatementQuery{Query: query} + desc := getDesc(cmd) + + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.Execute(context.Background(), query, s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +type mockDoActionClient struct { + mockGrpcClientStream +} + +func (m *mockDoActionClient) Recv() (*pb.Result, error) { + args := m.Called() + return args.Get(0).(*pb.Result), args.Error(1) +} + +type mockDoPutClient struct { + mockGrpcClientStream +} + +func (m *mockDoPutClient) Send(fd *flight.FlightData) error { + return m.Called(fd).Error(0) +} + +func (m *mockDoPutClient) Recv() (*pb.PutResult, error) { + args := m.Called() + return args.Get(0).(*pb.PutResult), args.Error(1) +} + +func (s *FlightSqlClientSuite) TestPreparedStatementExecute() { + const query = "query" + + cmd := &pb.ActionCreatePreparedStatementRequest{Query: query} + action := getAction(cmd) + action.Type = flightsql.CreatePreparedStatementActionType + closeAct := getAction(&pb.ActionClosePreparedStatementRequest{PreparedStatementHandle: []byte(query)}) + closeAct.Type = flightsql.ClosePreparedStatementActionType + + rsp := &mockDoActionClient{} + defer rsp.AssertExpectations(s.T()) + + result := &pb.ActionCreatePreparedStatementResult{PreparedStatementHandle: []byte(query)} + var out anypb.Any + out.MarshalFrom(result) + data, _ := proto.Marshal(&out) + rsp.On("Recv").Return(&pb.Result{Body: data}, nil) + + s.mockClient.On("DoAction", flightsql.CreatePreparedStatementActionType, action.Body, s.callOpts). + Return(rsp, nil) + s.mockClient.On("DoAction", flightsql.ClosePreparedStatementActionType, closeAct.Body, s.callOpts). + Return(rsp, nil) + + infoCmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(query)} + desc := getDesc(infoCmd) + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + + prepared, err := s.sqlClient.Prepare(context.TODO(), memory.DefaultAllocator, query, s.callOpts...) + s.NoError(err) + defer prepared.Close(context.TODO()) + + info, err := prepared.Execute(context.TODO()) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestPreparedStatementExecuteParamBinding() { + const query = "query" + + // create and close actions + cmd := &pb.ActionCreatePreparedStatementRequest{Query: query} + action := getAction(cmd) + action.Type = flightsql.CreatePreparedStatementActionType + closeAct := getAction(&pb.ActionClosePreparedStatementRequest{PreparedStatementHandle: []byte(query)}) + closeAct.Type = flightsql.ClosePreparedStatementActionType + + // results from createprepared statement + result := &pb.ActionCreatePreparedStatementResult{ + PreparedStatementHandle: []byte(query), + } + schema := arrow.NewSchema([]arrow.Field{{Name: "id", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil) + result.ParameterSchema = flight.SerializeSchema(schema, memory.DefaultAllocator) + + // mocked client stream + var out anypb.Any + out.MarshalFrom(result) + data, _ := proto.Marshal(&out) + rsp := &mockDoActionClient{} + defer rsp.AssertExpectations(s.T()) + rsp.On("Recv").Return(&pb.Result{Body: data}, nil) + + // expect two actions: one to create and one to close the prepared statement + s.mockClient.On("DoAction", flightsql.CreatePreparedStatementActionType, action.Body, s.callOpts).Return(rsp, nil) + s.mockClient.On("DoAction", flightsql.ClosePreparedStatementActionType, closeAct.Body, s.callOpts).Return(rsp, nil) + + expectedDesc := getDesc(&pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(query)}) + + // mocked client stream for DoPut + mockedPut := &mockDoPutClient{} + s.mockClient.On("DoPut", s.callOpts).Return(mockedPut, nil) + mockedPut.On("Send", mock.MatchedBy(func(fd *flight.FlightData) bool { + return proto.Equal(expectedDesc, fd.FlightDescriptor) + })).Return(nil).Twice() // first sends schema message, second sends data + mockedPut.On("CloseSend").Return(nil) + mockedPut.On("Recv").Return((*pb.PutResult)(nil), nil) + + infoCmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(query)} + desc := getDesc(infoCmd) + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + + prepared, err := s.sqlClient.Prepare(context.TODO(), memory.DefaultAllocator, query, s.callOpts...) + s.NoError(err) + defer prepared.Close(context.TODO()) + + paramSchema := prepared.ParameterSchema() + rec, _, err := array.RecordFromJSON(memory.DefaultAllocator, paramSchema, strings.NewReader(`[{"id": 1}]`)) + s.NoError(err) + defer rec.Release() + + prepared.SetParameters(rec) + info, err := prepared.Execute(context.TODO()) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func (s *FlightSqlClientSuite) TestExecuteUpdate() { + const query = "query" + + cmd := &pb.CommandStatementUpdate{Query: query} + desc := getDesc(cmd) + result := &pb.DoPutUpdateResult{RecordCount: 100} + resdata, _ := proto.Marshal(result) + + mockedPut := &mockDoPutClient{} + mockedPut.On("Send", mock.MatchedBy(func(fd *flight.FlightData) bool { + return proto.Equal(desc, fd.FlightDescriptor) + })).Return(nil) + mockedPut.On("CloseSend").Return(nil) + mockedPut.On("Recv").Return(&pb.PutResult{AppMetadata: resdata}, nil) + s.mockClient.On("DoPut", s.callOpts).Return(mockedPut, nil) + + num, err := s.sqlClient.ExecuteUpdate(context.TODO(), query, s.callOpts...) + s.NoError(err) + s.EqualValues(100, num) +} + +func (s *FlightSqlClientSuite) TestGetSqlInfo() { + sqlInfo := []flightsql.SqlInfo{ + flightsql.SqlInfoFlightSqlServerName, + flightsql.SqlInfoFlightSqlServerVersion, + flightsql.SqlInfoFlightSqlServerArrowVersion, + } + + cmd := &pb.CommandGetSqlInfo{Info: make([]uint32, len(sqlInfo))} + for i, info := range sqlInfo { + cmd.Info[i] = uint32(info) + } + desc := getDesc(cmd) + + s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) + info, err := s.sqlClient.GetSqlInfo(context.TODO(), sqlInfo, s.callOpts...) + s.NoError(err) + s.Equal(&emptyFlightInfo, info) +} + +func TestFlightSqlClient(t *testing.T) { + suite.Run(t, new(FlightSqlClientSuite)) +} diff --git a/go/arrow/flight/flightsql/column_metadata.go b/go/arrow/flight/flightsql/column_metadata.go new file mode 100644 index 0000000000000..6ad8030d9950f --- /dev/null +++ b/go/arrow/flight/flightsql/column_metadata.go @@ -0,0 +1,217 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flightsql + +import ( + "strconv" + + "github.com/apache/arrow/go/v10/arrow" +) + +const ( + boolTrueStr = "1" + boolFalseStr = "0" +) + +func boolToStr(v bool) string { + if v { + return boolTrueStr + } + return boolFalseStr +} + +func strToBool(v string) bool { + return v == boolTrueStr +} + +// Metadata Key Constants +const ( + CatalogNameKey = "ARROW:FLIGHT:SQL:CATALOG_NAME" + SchemaNameKey = "ARROW:FLIGHT:SQL:SCHEMA_NAME" + TableNameKey = "ARROW:FLIGHT:SQL:TABLE_NAME" + TypeNameKey = "ARROW:FLIGHT:SQL:TYPE_NAME" + PrecisionKey = "ARROW:FLIGHT:SQL:PRECISION" + ScaleKey = "ARROW:FLIGHT:SQL:SCALE" + IsAutoIncrementKey = "ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT" + IsCaseSensitiveKey = "ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE" + IsReadOnlyKey = "ARROW:FLIGHT:SQL:IS_READ_ONLY" + IsSearchableKey = "ARROW:FLIGHT:SQL:IS_SEARCHABLE" +) + +// ColumnMetadata is a helper object for managing and querying the +// standard SQL Column metadata using the expected Metadata Keys. +// It can be created by just Wrapping an existing *arrow.Metadata. +// +// Each of the methods return a value and a boolean indicating if it +// was set in the metadata or not. +type ColumnMetadata struct { + Data *arrow.Metadata +} + +func (c *ColumnMetadata) findStrVal(key string) (string, bool) { + idx := c.Data.FindKey(CatalogNameKey) + if idx == -1 { + return "", false + } + return c.Data.Values()[idx], true +} + +func (c *ColumnMetadata) findBoolVal(key string) (bool, bool) { + idx := c.Data.FindKey(CatalogNameKey) + if idx == -1 { + return false, false + } + return strToBool(c.Data.Values()[idx]), true +} + +func (c *ColumnMetadata) findInt32Val(key string) (int32, bool) { + idx := c.Data.FindKey(CatalogNameKey) + if idx == -1 { + return 0, false + } + v, err := strconv.ParseInt(c.Data.Values()[idx], 10, 32) + if err != nil { + return 0, false + } + return int32(v), true +} + +func (c *ColumnMetadata) CatalogName() (string, bool) { + return c.findStrVal(CatalogNameKey) +} + +func (c *ColumnMetadata) SchemaName() (string, bool) { + return c.findStrVal(SchemaNameKey) +} + +func (c *ColumnMetadata) TableName() (string, bool) { + return c.findStrVal(TableNameKey) +} + +func (c *ColumnMetadata) TypeName() (string, bool) { + return c.findStrVal(TypeNameKey) +} + +func (c *ColumnMetadata) Precision() (int32, bool) { + return c.findInt32Val(PrecisionKey) +} + +func (c *ColumnMetadata) Scale() (int32, bool) { + return c.findInt32Val(ScaleKey) +} + +func (c *ColumnMetadata) IsAutoIncrement() (bool, bool) { + return c.findBoolVal(IsAutoIncrementKey) +} + +func (c *ColumnMetadata) IsCaseSensitive() (bool, bool) { + return c.findBoolVal(IsCaseSensitiveKey) +} + +func (c *ColumnMetadata) IsReadOnly() (bool, bool) { + return c.findBoolVal(IsReadOnlyKey) +} + +func (c *ColumnMetadata) IsSearchable() (bool, bool) { + return c.findBoolVal(IsSearchableKey) +} + +// ColumnMetadataBuilder is a convenience builder for constructing +// sql column metadata using the expected standard metadata keys. +// All methods return the builder itself so it can be chained +// to easily construct a final metadata object. +type ColumnMetadataBuilder struct { + keys, vals []string +} + +func NewColumnMetadataBuilder() *ColumnMetadataBuilder { + return &ColumnMetadataBuilder{make([]string, 0), make([]string, 0)} +} + +func (c *ColumnMetadataBuilder) Clear() { + c.keys = c.keys[:0] + c.vals = c.vals[:0] +} + +func (c *ColumnMetadataBuilder) Build() ColumnMetadata { + md := c.Metadata() + return ColumnMetadata{&md} +} + +func (c *ColumnMetadataBuilder) Metadata() arrow.Metadata { + return arrow.NewMetadata(c.keys, c.vals) +} + +func (c *ColumnMetadataBuilder) CatalogName(name string) *ColumnMetadataBuilder { + c.keys = append(c.keys, CatalogNameKey) + c.vals = append(c.vals, name) + return c +} + +func (c *ColumnMetadataBuilder) SchemaName(name string) *ColumnMetadataBuilder { + c.keys = append(c.keys, SchemaNameKey) + c.vals = append(c.vals, name) + return c +} + +func (c *ColumnMetadataBuilder) TableName(name string) *ColumnMetadataBuilder { + c.keys = append(c.keys, TableNameKey) + c.vals = append(c.vals, name) + return c +} + +func (c *ColumnMetadataBuilder) TypeName(name string) *ColumnMetadataBuilder { + c.keys = append(c.keys, TypeNameKey) + c.vals = append(c.vals, name) + return c +} + +func (c *ColumnMetadataBuilder) Precision(prec int32) *ColumnMetadataBuilder { + c.keys = append(c.keys, PrecisionKey) + c.vals = append(c.vals, strconv.Itoa(int(prec))) + return c +} + +func (c *ColumnMetadataBuilder) Scale(prec int32) *ColumnMetadataBuilder { + c.keys = append(c.keys, ScaleKey) + c.vals = append(c.vals, strconv.Itoa(int(prec))) + return c +} + +func (c *ColumnMetadataBuilder) IsAutoIncrement(v bool) *ColumnMetadataBuilder { + c.keys = append(c.keys, IsAutoIncrementKey) + c.vals = append(c.vals, boolToStr(v)) + return c +} + +func (c *ColumnMetadataBuilder) IsCaseSensitive(v bool) *ColumnMetadataBuilder { + c.keys = append(c.keys, IsCaseSensitiveKey) + c.vals = append(c.vals, boolToStr(v)) + return c +} + +func (c *ColumnMetadataBuilder) IsReadOnly(v bool) *ColumnMetadataBuilder { + c.keys = append(c.keys, IsReadOnlyKey) + c.vals = append(c.vals, boolToStr(v)) + return c +} + +func (c *ColumnMetadataBuilder) IsSearchable(v bool) *ColumnMetadataBuilder { + c.keys = append(c.keys, IsSearchableKey) + c.vals = append(c.vals, boolToStr(v)) + return c +} diff --git a/go/arrow/flight/flightsql/example/sql_batch_reader.go b/go/arrow/flight/flightsql/example/sql_batch_reader.go new file mode 100644 index 0000000000000..92f32971e2705 --- /dev/null +++ b/go/arrow/flight/flightsql/example/sql_batch_reader.go @@ -0,0 +1,285 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build go1.18 +// +build go1.18 + +package example + +import ( + "database/sql" + "reflect" + "strings" + "sync/atomic" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" +) + +func getArrowTypeFromString(dbtype string) arrow.DataType { + dbtype = strings.ToLower(dbtype) + if strings.HasPrefix(dbtype, "varchar") { + return arrow.BinaryTypes.String + } + + switch dbtype { + case "int", "integer": + return arrow.PrimitiveTypes.Int64 + case "real": + return arrow.PrimitiveTypes.Float64 + case "blob": + return arrow.BinaryTypes.Binary + case "text", "date", "char": + return arrow.BinaryTypes.String + default: + panic("invalid sqlite type: " + dbtype) + } +} + +func getArrowType(c *sql.ColumnType) arrow.DataType { + dbtype := strings.ToLower(c.DatabaseTypeName()) + if dbtype == "" { + switch c.ScanType().Kind() { + case reflect.Int, reflect.Int64, reflect.Uint64: + return arrow.PrimitiveTypes.Int64 + case reflect.Float32, reflect.Float64: + return arrow.PrimitiveTypes.Float64 + } + } + return getArrowTypeFromString(dbtype) +} + +const maxBatchSize = 1024 + +type SqlBatchReader struct { + refCount int64 + + schema *arrow.Schema + rows *sql.Rows + record arrow.Record + bldr *array.RecordBuilder + err error + + rowdest []interface{} +} + +func NewSqlBatchReaderWithSchema(mem memory.Allocator, schema *arrow.Schema, rows *sql.Rows) (*SqlBatchReader, error) { + rowdest := make([]interface{}, len(schema.Fields())) + for i, f := range schema.Fields() { + switch f.Type.ID() { + case arrow.UINT8: + if f.Nullable { + rowdest[i] = &sql.NullInt32{} + } else { + rowdest[i] = new(uint8) + } + case arrow.INT32: + if f.Nullable { + rowdest[i] = &sql.NullInt32{} + } else { + rowdest[i] = new(int32) + } + case arrow.INT64: + if f.Nullable { + rowdest[i] = &sql.NullInt64{} + } else { + rowdest[i] = new(int64) + } + case arrow.FLOAT64: + if f.Nullable { + rowdest[i] = &sql.NullFloat64{} + } else { + rowdest[i] = new(float64) + } + case arrow.BINARY: + var b []byte + rowdest[i] = &b + case arrow.STRING: + if f.Nullable { + rowdest[i] = &sql.NullString{} + } else { + rowdest[i] = new(string) + } + } + } + + return &SqlBatchReader{ + refCount: 1, + bldr: array.NewRecordBuilder(mem, schema), + schema: schema, + rowdest: rowdest, + rows: rows}, nil +} + +func NewSqlBatchReader(mem memory.Allocator, rows *sql.Rows) (*SqlBatchReader, error) { + bldr := flightsql.NewColumnMetadataBuilder() + + cols, err := rows.ColumnTypes() + if err != nil { + rows.Close() + return nil, err + } + + rowdest := make([]interface{}, len(cols)) + fields := make([]arrow.Field, len(cols)) + for i, c := range cols { + fields[i].Name = c.Name() + fields[i].Nullable, _ = c.Nullable() + fields[i].Type = getArrowType(c) + fields[i].Metadata = getColumnMetadata(bldr, getSqlTypeFromTypeName(c.DatabaseTypeName()), "") + switch fields[i].Type.ID() { + case arrow.UINT8: + if fields[i].Nullable { + rowdest[i] = &sql.NullInt32{} + } else { + rowdest[i] = new(uint8) + } + case arrow.INT32: + if fields[i].Nullable { + rowdest[i] = &sql.NullInt32{} + } else { + rowdest[i] = new(int32) + } + case arrow.INT64: + if fields[i].Nullable { + rowdest[i] = &sql.NullInt64{} + } else { + rowdest[i] = new(int64) + } + case arrow.FLOAT64: + if fields[i].Nullable { + rowdest[i] = &sql.NullFloat64{} + } else { + rowdest[i] = new(float64) + } + case arrow.BINARY: + var b []byte + rowdest[i] = &b + case arrow.STRING: + if fields[i].Nullable { + rowdest[i] = &sql.NullString{} + } else { + rowdest[i] = new(string) + } + } + } + + schema := arrow.NewSchema(fields, nil) + return &SqlBatchReader{ + refCount: 1, + bldr: array.NewRecordBuilder(mem, schema), + schema: schema, + rowdest: rowdest, + rows: rows}, nil +} + +func (r *SqlBatchReader) Retain() { + atomic.AddInt64(&r.refCount, 1) +} + +func (r *SqlBatchReader) Release() { + debug.Assert(atomic.LoadInt64(&r.refCount) > 0, "too many releases") + + if atomic.AddInt64(&r.refCount, -1) == 0 { + r.rows.Close() + r.rows, r.schema, r.rowdest = nil, nil, nil + r.bldr.Release() + r.bldr = nil + if r.record != nil { + r.record.Release() + r.record = nil + } + } +} +func (r *SqlBatchReader) Schema() *arrow.Schema { return r.schema } + +func (r *SqlBatchReader) Record() arrow.Record { return r.record } + +func (r *SqlBatchReader) Err() error { return r.err } + +func (r *SqlBatchReader) Next() bool { + if r.record != nil { + r.record.Release() + r.record = nil + } + + rows := 0 + for rows < maxBatchSize && r.rows.Next() { + if err := r.rows.Scan(r.rowdest...); err != nil { + r.err = err + return false + } + + for i, v := range r.rowdest { + fb := r.bldr.Field(i) + switch v := v.(type) { + case *uint8: + fb.(*array.Uint8Builder).Append(*v) + case *int64: + fb.(*array.Int64Builder).Append(*v) + case *sql.NullInt64: + if !v.Valid { + fb.AppendNull() + } else { + fb.(*array.Int64Builder).Append(v.Int64) + } + case *int32: + fb.(*array.Int32Builder).Append(*v) + case *sql.NullInt32: + if !v.Valid { + fb.AppendNull() + } else { + switch b := fb.(type) { + case *array.Int32Builder: + b.Append(v.Int32) + case *array.Uint8Builder: + b.Append(uint8(v.Int32)) + } + } + case *float64: + fb.(*array.Float64Builder).Append(*v) + case *sql.NullFloat64: + if !v.Valid { + fb.AppendNull() + } else { + fb.(*array.Float64Builder).Append(v.Float64) + } + case *[]byte: + if v == nil { + fb.AppendNull() + } else { + fb.(*array.BinaryBuilder).Append(*v) + } + case *string: + fb.(*array.StringBuilder).Append(*v) + case *sql.NullString: + if !v.Valid { + fb.AppendNull() + } else { + fb.(*array.StringBuilder).Append(v.String) + } + } + } + + rows++ + } + + r.record = r.bldr.NewRecord() + return rows > 0 +} diff --git a/go/arrow/flight/flightsql/example/sqlite_info.go b/go/arrow/flight/flightsql/example/sqlite_info.go new file mode 100644 index 0000000000000..e8395a6b33cf1 --- /dev/null +++ b/go/arrow/flight/flightsql/example/sqlite_info.go @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build go1.18 +// +build go1.18 + +package example + +import ( + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql" +) + +func SqlInfoResultMap() flightsql.SqlInfoResultMap { + return flightsql.SqlInfoResultMap{ + uint32(flightsql.SqlInfoFlightSqlServerName): "db_name", + uint32(flightsql.SqlInfoFlightSqlServerVersion): "sqlite 3", + uint32(flightsql.SqlInfoFlightSqlServerArrowVersion): arrow.PkgVersion, + uint32(flightsql.SqlInfoFlightSqlServerReadOnly): false, + uint32(flightsql.SqlInfoDDLCatalog): false, + uint32(flightsql.SqlInfoDDLSchema): false, + uint32(flightsql.SqlInfoDDLTable): true, + uint32(flightsql.SqlInfoIdentifierCase): int64(flightsql.SqlCaseSensitivityCaseInsensitive), + uint32(flightsql.SqlInfoIdentifierQuoteChar): `"`, + uint32(flightsql.SqlInfoQuotedIdentifierCase): int64(flightsql.SqlCaseSensitivityCaseInsensitive), + uint32(flightsql.SqlInfoAllTablesAreASelectable): true, + uint32(flightsql.SqlInfoNullOrdering): int64(flightsql.SqlNullOrderingSortAtStart), + uint32(flightsql.SqlInfoKeywords): []string{"ABORT", + "ACTION", + "ADD", + "AFTER", + "ALL", + "ALTER", + "ALWAYS", + "ANALYZE", + "AND", + "AS", + "ASC", + "ATTACH", + "AUTOINCREMENT", + "BEFORE", + "BEGIN", + "BETWEEN", + "BY", + "CASCADE", + "CASE", + "CAST", + "CHECK", + "COLLATE", + "COLUMN", + "COMMIT", + "CONFLICT", + "CONSTRAINT", + "CREATE", + "CROSS", + "CURRENT", + "CURRENT_DATE", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "DATABASE", + "DEFAULT", + "DEFERRABLE", + "DEFERRED", + "DELETE", + "DESC", + "DETACH", + "DISTINCT", + "DO", + "DROP", + "EACH", + "ELSE", + "END", + "ESCAPE", + "EXCEPT", + "EXCLUDE", + "EXCLUSIVE", + "EXISTS", + "EXPLAIN", + "FAIL", + "FILTER", + "FIRST", + "FOLLOWING", + "FOR", + "FOREIGN", + "FROM", + "FULL", + "GENERATED", + "GLOB", + "GROUP", + "GROUPS", + "HAVING", + "IF", + "IGNORE", + "IMMEDIATE", + "IN", + "INDEX", + "INDEXED", + "INITIALLY", + "INNER", + "INSERT", + "INSTEAD", + "INTERSECT", + "INTO", + "IS", + "ISNULL", + "JOIN", + "KEY", + "LAST", + "LEFT", + "LIKE", + "LIMIT", + "MATCH", + "MATERIALIZED", + "NATURAL", + "NO", + "NOT", + "NOTHING", + "NOTNULL", + "NULL", + "NULLS", + "OF", + "OFFSET", + "ON", + "OR", + "ORDER", + "OTHERS", + "OUTER", + "OVER", + "PARTITION", + "PLAN", + "PRAGMA", + "PRECEDING", + "PRIMARY", + "QUERY", + "RAISE", + "RANGE", + "RECURSIVE", + "REFERENCES", + "REGEXP", + "REINDEX", + "RELEASE", + "RENAME", + "REPLACE", + "RESTRICT", + "RETURNING", + "RIGHT", + "ROLLBACK", + "ROW", + "ROWS", + "SAVEPOINT", + "SELECT", + "SET", + "TABLE", + "TEMP", + "TEMPORARY", + "THEN", + "TIES", + "TO", + "TRANSACTION", + "TRIGGER", + "UNBOUNDED", + "UNION", + "UNIQUE", + "UPDATE", + "USING", + "VACUUM", + "VALUES", + "VIEW", + "VIRTUAL", + "WHEN", + "WHERE", + "WINDOW", + "WITH", + "WITHOUT"}, + uint32(flightsql.SqlInfoNumericFunctions): []string{ + "ACOS", "ACOSH", "ASIN", "ASINH", "ATAN", "ATAN2", "ATANH", "CEIL", + "CEILING", "COS", "COSH", "DEGREES", "EXP", "FLOOR", "LN", "LOG", + "LOG10", "LOG2", "MOD", "PI", "POW", "POWER", "RADIANS", + "SIN", "SINH", "SQRT", "TAN", "TANH", "TRUNC"}, + uint32(flightsql.SqlInfoStringFunctions): []string{"SUBSTR", "TRIM", "LTRIM", "RTRIM", "LENGTH", + "REPLACE", "UPPER", "LOWER", "INSTR"}, + uint32(flightsql.SqlInfoSupportsConvert): map[int32][]int32{ + int32(flightsql.SqlConvertBigInt): {int32(flightsql.SqlConvertInteger)}, + }, + } +} diff --git a/go/arrow/flight/flightsql/example/sqlite_server.go b/go/arrow/flight/flightsql/example/sqlite_server.go new file mode 100644 index 0000000000000..0d4c4ea99da37 --- /dev/null +++ b/go/arrow/flight/flightsql/example/sqlite_server.go @@ -0,0 +1,570 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build go1.18 +// +build go1.18 + +// Package example contains a FlightSQL Server implementation using +// sqlite as the backing engine. +// +// In order to ensure portability we'll use modernc.org/sqlite instead +// of github.com/mattn/go-sqlite3 because modernc is a translation of the +// SQLite source into Go, such that it doesn't require CGO to run and +// doesn't need to link against the actual libsqlite3 libraries. This way +// we don't require CGO or libsqlite3 to run this example or the tests. +// +// That said, since both implement in terms of Go's standard database/sql +// package, it's easy to swap them out if desired as the modernc.org/sqlite +// package is slower than go-sqlite3. +// +// One other important note is that modernc.org/sqlite only works +// correctly (specifically pragma_table_info) in go 1.18+ so this +// entire package is given the build constraint to only build when +// using go1.18 or higher +package example + +import ( + "context" + "database/sql" + "fmt" + "math/rand" + "strings" + "sync" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + _ "modernc.org/sqlite" +) + +func genRandomString() []byte { + const length = 16 + max := int('z') + min := int('0') + + out := make([]byte, length) + for i := range out { + out[i] = byte(rand.Intn(max-min+1) + min) + } + return out +} + +func prepareQueryForGetTables(cmd flightsql.GetTables) string { + var b strings.Builder + b.WriteString(`SELECT null AS catalog_name, null AS schema_name, + name AS table_name, type AS table_type FROM sqlite_master WHERE 1=1`) + + if cmd.GetCatalog() != nil { + b.WriteString(" and catalog_name = '") + b.WriteString(*cmd.GetCatalog()) + b.WriteByte('\'') + } + + if cmd.GetDBSchemaFilterPattern() != nil { + b.WriteString(" and schema_name LIKE '") + b.WriteString(*cmd.GetDBSchemaFilterPattern()) + b.WriteByte('\'') + } + + if cmd.GetTableNameFilterPattern() != nil { + b.WriteString(" and table_name LIKE '") + b.WriteString(*cmd.GetTableNameFilterPattern()) + b.WriteByte('\'') + } + + if len(cmd.GetTableTypes()) > 0 { + b.WriteString(" and table_type IN (") + for i, t := range cmd.GetTableTypes() { + if i != 0 { + b.WriteByte(',') + } + fmt.Fprintf(&b, "'%s'", t) + } + b.WriteByte(')') + } + + b.WriteString(" order by table_name") + return b.String() +} + +func prepareQueryForGetKeys(filter string) string { + return `SELECT * FROM ( + SELECT + NULL AS pk_catalog_name, + NULL AS pk_schema_name, + p."table" AS pk_table_name, + p."to" AS pk_column_name, + NULL AS fk_catalog_name, + NULL AS fk_schema_name, + m.name AS fk_table_name, + p."from" AS fk_column_name, + p.seq AS key_sequence, + NULL AS pk_key_name, + NULL AS fk_key_name, + CASE + WHEN p.on_update = 'CASCADE' THEN 0 + WHEN p.on_update = 'RESTRICT' THEN 1 + WHEN p.on_update = 'SET NULL' THEN 2 + WHEN p.on_update = 'NO ACTION' THEN 3 + WHEN p.on_update = 'SET DEFAULT' THEN 4 + END AS update_rule, + CASE + WHEN p.on_delete = 'CASCADE' THEN 0 + WHEN p.on_delete = 'RESTRICT' THEN 1 + WHEN p.on_delete = 'SET NULL' THEN 2 + WHEN p.on_delete = 'NO ACTION' THEN 3 + WHEN p.on_delete = 'SET DEFAULT' THEN 4 + END AS delete_rule + FROM sqlite_master m + JOIN pragma_foreign_key_list(m.name) p ON m.name != p."table" + WHERE m.type = 'table') WHERE ` + filter + + ` ORDER BY pk_catalog_name, pk_schema_name, pk_table_name, pk_key_name, key_sequence` +} + +type Statement struct { + stmt *sql.Stmt + params []interface{} +} + +type SQLiteFlightSQLServer struct { + flightsql.BaseServer + db *sql.DB + + prepared sync.Map +} + +func NewSQLiteFlightSQLServer() (*SQLiteFlightSQLServer, error) { + db, err := sql.Open("sqlite", ":memory:") + if err != nil { + return nil, err + } + + _, err = db.Exec(` + CREATE TABLE foreignTable ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + foreignName varchar(100), + value int); + + CREATE TABLE intTable ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + keyName varchar(100), + value int, + foreignId int references foreignTable(id)); + + INSERT INTO foreignTable (foreignName, value) VALUES ('keyOne', 1); + INSERT INTO foreignTable (foreignName, value) VALUES ('keyTwo', 0); + INSERT INTO foreignTable (foreignName, value) VALUES ('keyThree', -1); + INSERT INTO intTable (keyName, value, foreignId) VALUES ('one', 1, 1); + INSERT INTO intTable (keyName, value, foreignId) VALUES ('zero', 0, 1); + INSERT INTO intTable (keyName, value, foreignId) VALUES ('negative one', -1, 1); + INSERT INTO intTable (keyName, value, foreignId) VALUES (NULL, NULL, NULL); + `) + + if err != nil { + return nil, err + } + ret := &SQLiteFlightSQLServer{db: db} + for k, v := range SqlInfoResultMap() { + ret.RegisterSqlInfo(flightsql.SqlInfo(k), v) + } + return ret, nil +} + +func (s *SQLiteFlightSQLServer) flightInfoForCommand(desc *flight.FlightDescriptor, schema *arrow.Schema) *flight.FlightInfo { + return &flight.FlightInfo{ + Endpoint: []*flight.FlightEndpoint{{Ticket: &flight.Ticket{Ticket: desc.Cmd}}}, + FlightDescriptor: desc, + Schema: flight.SerializeSchema(schema, s.Alloc), + TotalRecords: -1, + TotalBytes: -1, + } +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoStatement(ctx context.Context, cmd flightsql.StatementQuery, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + query := cmd.GetQuery() + tkt, err := flightsql.CreateStatementQueryTicket([]byte(query)) + if err != nil { + return nil, err + } + + return &flight.FlightInfo{ + Endpoint: []*flight.FlightEndpoint{{Ticket: &flight.Ticket{Ticket: tkt}}}, + FlightDescriptor: desc, + TotalRecords: -1, + TotalBytes: -1, + }, nil +} + +func (s *SQLiteFlightSQLServer) DoGetStatement(ctx context.Context, cmd flightsql.StatementQueryTicket) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return doGetQuery(ctx, s.Alloc, s.db, string(cmd.GetStatementHandle()), nil) +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoCatalogs(_ context.Context, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return s.flightInfoForCommand(desc, schema_ref.Catalogs), nil +} + +func (s *SQLiteFlightSQLServer) DoGetCatalogs(context.Context) (*arrow.Schema, <-chan flight.StreamChunk, error) { + // sqlite doesn't support catalogs, this returns an empty record batch + schema := schema_ref.Catalogs + + ch := make(chan flight.StreamChunk) + close(ch) + + return schema, ch, nil +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoSchemas(_ context.Context, cmd flightsql.GetDBSchemas, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return s.flightInfoForCommand(desc, schema_ref.DBSchemas), nil +} + +func (s *SQLiteFlightSQLServer) DoGetDBSchemas(context.Context, flightsql.GetDBSchemas) (*arrow.Schema, <-chan flight.StreamChunk, error) { + // sqlite doesn't support schemas, this returns an empty record batch + schema := schema_ref.DBSchemas + + ch := make(chan flight.StreamChunk) + close(ch) + + return schema, ch, nil +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoTables(_ context.Context, cmd flightsql.GetTables, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + schema := schema_ref.Tables + if cmd.GetIncludeSchema() { + schema = schema_ref.TablesWithIncludedSchema + } + return s.flightInfoForCommand(desc, schema), nil +} + +func (s *SQLiteFlightSQLServer) DoGetTables(ctx context.Context, cmd flightsql.GetTables) (*arrow.Schema, <-chan flight.StreamChunk, error) { + query := prepareQueryForGetTables(cmd) + + rows, err := s.db.QueryContext(ctx, query) + if err != nil { + return nil, nil, err + } + + var rdr array.RecordReader + + rdr, err = NewSqlBatchReaderWithSchema(s.Alloc, schema_ref.Tables, rows) + if err != nil { + return nil, nil, err + } + + ch := make(chan flight.StreamChunk, 2) + if cmd.GetIncludeSchema() { + rdr, err = NewSqliteTablesSchemaBatchReader(ctx, s.Alloc, rdr, s.db, query) + if err != nil { + return nil, nil, err + } + } + + schema := rdr.Schema() + go flight.StreamChunksFromReader(rdr, ch) + return schema, ch, nil +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoXdbcTypeInfo(_ context.Context, _ flightsql.GetXdbcTypeInfo, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return s.flightInfoForCommand(desc, schema_ref.XdbcTypeInfo), nil +} + +func (s *SQLiteFlightSQLServer) DoGetXdbcTypeInfo(_ context.Context, cmd flightsql.GetXdbcTypeInfo) (*arrow.Schema, <-chan flight.StreamChunk, error) { + var batch arrow.Record + if cmd.GetDataType() == nil { + batch = GetTypeInfoResult(s.Alloc) + } else { + batch = GetFilteredTypeInfoResult(s.Alloc, *cmd.GetDataType()) + } + + ch := make(chan flight.StreamChunk, 1) + ch <- flight.StreamChunk{Data: batch} + close(ch) + return batch.Schema(), ch, nil +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoTableTypes(_ context.Context, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return s.flightInfoForCommand(desc, schema_ref.TableTypes), nil +} + +func (s *SQLiteFlightSQLServer) DoGetTableTypes(ctx context.Context) (*arrow.Schema, <-chan flight.StreamChunk, error) { + query := "SELECT DISTINCT type AS table_type FROM sqlite_master" + return doGetQuery(ctx, s.Alloc, s.db, query, schema_ref.TableTypes) +} + +func (s *SQLiteFlightSQLServer) DoPutCommandStatementUpdate(ctx context.Context, cmd flightsql.StatementUpdate) (int64, error) { + res, err := s.db.ExecContext(ctx, cmd.GetQuery()) + if err != nil { + return 0, err + } + return res.RowsAffected() +} + +func (s *SQLiteFlightSQLServer) CreatePreparedStatement(ctx context.Context, req flightsql.ActionCreatePreparedStatementRequest) (result flightsql.ActionCreatePreparedStatementResult, err error) { + stmt, err := s.db.PrepareContext(ctx, req.GetQuery()) + if err != nil { + return result, err + } + + handle := genRandomString() + s.prepared.Store(string(handle), Statement{stmt: stmt}) + + result.Handle = handle + // no way to get the dataset or parameter schemas from sql.DB + return +} + +func (s *SQLiteFlightSQLServer) ClosePreparedStatement(ctx context.Context, request flightsql.ActionClosePreparedStatementRequest) error { + handle := request.GetPreparedStatementHandle() + if val, loaded := s.prepared.LoadAndDelete(string(handle)); loaded { + stmt := val.(Statement) + return stmt.stmt.Close() + } + + return status.Error(codes.InvalidArgument, "prepared statement not found") +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoPreparedStatement(_ context.Context, cmd flightsql.PreparedStatementQuery, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + _, ok := s.prepared.Load(string(cmd.GetPreparedStatementHandle())) + if !ok { + return nil, status.Error(codes.InvalidArgument, "prepared statement not found") + } + + return &flight.FlightInfo{ + Endpoint: []*flight.FlightEndpoint{{Ticket: &flight.Ticket{Ticket: desc.Cmd}}}, + FlightDescriptor: desc, + TotalRecords: -1, + TotalBytes: -1, + }, nil +} + +func doGetQuery(ctx context.Context, mem memory.Allocator, db *sql.DB, query string, schema *arrow.Schema, args ...interface{}) (*arrow.Schema, <-chan flight.StreamChunk, error) { + rows, err := db.QueryContext(ctx, query, args...) + if err != nil { + return nil, nil, err + } + + var rdr *SqlBatchReader + if schema != nil { + rdr, err = NewSqlBatchReaderWithSchema(mem, schema, rows) + } else { + rdr, err = NewSqlBatchReader(mem, rows) + if err == nil { + schema = rdr.schema + } + } + + if err != nil { + return nil, nil, err + } + + ch := make(chan flight.StreamChunk) + go flight.StreamChunksFromReader(rdr, ch) + return schema, ch, nil +} + +func (s *SQLiteFlightSQLServer) DoGetPreparedStatement(ctx context.Context, cmd flightsql.PreparedStatementQuery) (*arrow.Schema, <-chan flight.StreamChunk, error) { + val, ok := s.prepared.Load(string(cmd.GetPreparedStatementHandle())) + if !ok { + return nil, nil, status.Error(codes.InvalidArgument, "prepared statement not found") + } + + stmt := val.(Statement) + rows, err := stmt.stmt.QueryContext(ctx, stmt.params...) + if err != nil { + return nil, nil, err + } + + rdr, err := NewSqlBatchReader(s.Alloc, rows) + if err != nil { + return nil, nil, err + } + + schema := rdr.schema + ch := make(chan flight.StreamChunk) + go flight.StreamChunksFromReader(rdr, ch) + return schema, ch, nil +} + +func getParamsForStatement(rdr flight.MessageReader) (params []interface{}, err error) { + for rdr.Next() { + rec := rdr.Record() + + nrows := int(rec.NumRows()) + ncols := int(rec.NumCols()) + + if len(params) < int(ncols) { + params = make([]interface{}, ncols) + } + + for i := 0; i < nrows; i++ { + for c := 0; c < ncols; c++ { + col := rec.Column(c) + sc, err := scalar.GetScalar(col, i) + if err != nil { + return nil, err + } + if r, ok := sc.(scalar.Releasable); ok { + r.Release() + } + + switch v := sc.(*scalar.DenseUnion).Value.(type) { + case *scalar.Int64: + params[c] = v.Value + case *scalar.Float32: + params[c] = v.Value + case *scalar.Float64: + params[c] = v.Value + case *scalar.String: + params[c] = string(v.Value.Bytes()) + case *scalar.Binary: + params[c] = v.Value.Bytes() + default: + return nil, fmt.Errorf("unsupported type: %s", v) + } + } + } + } + + return params, rdr.Err() +} + +func (s *SQLiteFlightSQLServer) DoPutPreparedStatementQuery(_ context.Context, cmd flightsql.PreparedStatementQuery, rdr flight.MessageReader, _ flight.MetadataWriter) error { + val, ok := s.prepared.Load(string(cmd.GetPreparedStatementHandle())) + if !ok { + return status.Error(codes.InvalidArgument, "prepared statement not found") + } + + stmt := val.(Statement) + args, err := getParamsForStatement(rdr) + if err != nil { + return status.Errorf(codes.Internal, "error gathering parameters for prepared statement query: %s", err.Error()) + } + + stmt.params = args + s.prepared.Store(string(cmd.GetPreparedStatementHandle()), stmt) + return nil +} + +func (s *SQLiteFlightSQLServer) DoPutPreparedStatementUpdate(ctx context.Context, cmd flightsql.PreparedStatementUpdate, rdr flight.MessageReader) (int64, error) { + val, ok := s.prepared.Load(string(cmd.GetPreparedStatementHandle())) + if !ok { + return 0, status.Error(codes.InvalidArgument, "prepared statement not found") + } + + stmt := val.(Statement) + args, err := getParamsForStatement(rdr) + if err != nil { + return 0, status.Errorf(codes.Internal, "error gathering parameters for prepared statement: %s", err.Error()) + } + + stmt.params = args + result, err := stmt.stmt.ExecContext(ctx, args...) + if err != nil { + return 0, err + } + + return result.RowsAffected() +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoPrimaryKeys(_ context.Context, cmd flightsql.TableRef, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return s.flightInfoForCommand(desc, schema_ref.PrimaryKeys), nil +} + +func (s *SQLiteFlightSQLServer) DoGetPrimaryKeys(ctx context.Context, cmd flightsql.TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + // the field key_name can not be recovered by sqlite so it is + // being set to null following the same pattern for catalog name and schema_name + var b strings.Builder + + b.WriteString(` + SELECT null AS catalog_name, null AS schema_name, table_name, name AS column_name, pk AS key_sequence, null as key_name + FROM pragma_table_info(table_name) + JOIN (SELECT null AS catalog_name, null AS schema_name, name AS table_name, type AS table_type + FROM sqlite_master) where 1=1 AND pk !=0`) + + if cmd.Catalog != nil { + fmt.Fprintf(&b, " and catalog_name LIKE '%s'", *cmd.Catalog) + } + if cmd.DBSchema != nil { + fmt.Fprintf(&b, " and schema_name LIKE '%s'", *cmd.DBSchema) + } + + fmt.Fprintf(&b, " and table_name LIKE '%s'", cmd.Table) + + return doGetQuery(ctx, s.Alloc, s.db, b.String(), schema_ref.PrimaryKeys) +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoImportedKeys(_ context.Context, _ flightsql.TableRef, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return s.flightInfoForCommand(desc, schema_ref.ImportedKeys), nil +} + +func (s *SQLiteFlightSQLServer) DoGetImportedKeys(ctx context.Context, ref flightsql.TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + filter := "fk_table_name = '" + ref.Table + "'" + if ref.Catalog != nil { + filter += " AND fk_catalog_name = '" + *ref.Catalog + "'" + } + if ref.DBSchema != nil { + filter += " AND fk_schema_name = '" + *ref.DBSchema + "'" + } + query := prepareQueryForGetKeys(filter) + return doGetQuery(ctx, s.Alloc, s.db, query, schema_ref.ImportedKeys) +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoExportedKeys(_ context.Context, _ flightsql.TableRef, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return s.flightInfoForCommand(desc, schema_ref.ExportedKeys), nil +} + +func (s *SQLiteFlightSQLServer) DoGetExportedKeys(ctx context.Context, ref flightsql.TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + filter := "pk_table_name = '" + ref.Table + "'" + if ref.Catalog != nil { + filter += " AND pk_catalog_name = '" + *ref.Catalog + "'" + } + if ref.DBSchema != nil { + filter += " AND pk_schema_name = '" + *ref.DBSchema + "'" + } + query := prepareQueryForGetKeys(filter) + return doGetQuery(ctx, s.Alloc, s.db, query, schema_ref.ExportedKeys) +} + +func (s *SQLiteFlightSQLServer) GetFlightInfoCrossReference(_ context.Context, _ flightsql.CrossTableRef, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return s.flightInfoForCommand(desc, schema_ref.CrossReference), nil +} + +func (s *SQLiteFlightSQLServer) DoGetCrossReference(ctx context.Context, cmd flightsql.CrossTableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + pkref := cmd.PKRef + filter := "pk_table_name = '" + pkref.Table + "'" + if pkref.Catalog != nil { + filter += " AND pk_catalog_name = '" + *pkref.Catalog + "'" + } + if pkref.DBSchema != nil { + filter += " AND pk_schema_name = '" + *pkref.DBSchema + "'" + } + + fkref := cmd.FKRef + filter += " AND fk_table_name = '" + fkref.Table + "'" + if fkref.Catalog != nil { + filter += " AND fk_catalog_name = '" + *fkref.Catalog + "'" + } + if fkref.DBSchema != nil { + filter += " AND fk_schema_name = '" + *fkref.DBSchema + "'" + } + query := prepareQueryForGetKeys(filter) + return doGetQuery(ctx, s.Alloc, s.db, query, schema_ref.ExportedKeys) +} diff --git a/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go b/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go new file mode 100644 index 0000000000000..a53e36828c4c0 --- /dev/null +++ b/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build go1.18 +// +build go1.18 + +package example + +import ( + "context" + "database/sql" + "strings" + "sync/atomic" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" + sqlite3 "modernc.org/sqlite/lib" +) + +type SqliteTablesSchemaBatchReader struct { + refCount int64 + + mem memory.Allocator + ctx context.Context + rdr array.RecordReader + stmt *sql.Stmt + schemaBldr *array.BinaryBuilder + record arrow.Record + err error +} + +func NewSqliteTablesSchemaBatchReader(ctx context.Context, mem memory.Allocator, rdr array.RecordReader, db *sql.DB, mainQuery string) (*SqliteTablesSchemaBatchReader, error) { + schemaQuery := `SELECT table_name, name, type, [notnull] + FROM pragma_table_info(table_name) + JOIN (` + mainQuery + `) WHERE table_name = ?` + + stmt, err := db.PrepareContext(ctx, schemaQuery) + if err != nil { + rdr.Release() + return nil, err + } + + return &SqliteTablesSchemaBatchReader{ + refCount: 1, + ctx: ctx, + rdr: rdr, + stmt: stmt, + mem: mem, + schemaBldr: array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary), + }, nil +} + +func (s *SqliteTablesSchemaBatchReader) Err() error { return s.err } + +func (s *SqliteTablesSchemaBatchReader) Retain() { atomic.AddInt64(&s.refCount, 1) } + +func (s *SqliteTablesSchemaBatchReader) Release() { + debug.Assert(atomic.LoadInt64(&s.refCount) > 0, "too many releases") + + if atomic.AddInt64(&s.refCount, -1) == 0 { + s.rdr.Release() + s.stmt.Close() + s.schemaBldr.Release() + if s.record != nil { + s.record.Release() + s.record = nil + } + } +} + +func (s *SqliteTablesSchemaBatchReader) Schema() *arrow.Schema { + fields := append(s.rdr.Schema().Fields(), + arrow.Field{Name: "table_schema", Type: arrow.BinaryTypes.Binary}) + return arrow.NewSchema(fields, nil) +} + +func (s *SqliteTablesSchemaBatchReader) Record() arrow.Record { return s.record } + +func getSqlTypeFromTypeName(sqltype string) int { + if sqltype == "" { + return sqlite3.SQLITE_NULL + } + + sqltype = strings.ToLower(sqltype) + + if strings.HasPrefix(sqltype, "varchar") || strings.HasPrefix(sqltype, "char") { + return sqlite3.SQLITE_TEXT + } + + switch sqltype { + case "int", "integer": + return sqlite3.SQLITE_INTEGER + case "real": + return sqlite3.SQLITE_FLOAT + case "blob": + return sqlite3.SQLITE_BLOB + case "text", "date": + return sqlite3.SQLITE_TEXT + default: + return sqlite3.SQLITE_NULL + } +} + +func getPrecisionFromCol(sqltype int) int { + switch sqltype { + case sqlite3.SQLITE_INTEGER: + return 10 + case sqlite3.SQLITE_FLOAT: + return 15 + } + return 0 +} + +func getColumnMetadata(bldr *flightsql.ColumnMetadataBuilder, sqltype int, table string) arrow.Metadata { + defer bldr.Clear() + + bldr.Scale(15).IsReadOnly(false).IsAutoIncrement(false) + if table != "" { + bldr.TableName(table) + } + switch sqltype { + case sqlite3.SQLITE_TEXT, sqlite3.SQLITE_BLOB: + default: + bldr.Precision(int32(getPrecisionFromCol(sqltype))) + } + + return bldr.Metadata() +} + +func (s *SqliteTablesSchemaBatchReader) Next() bool { + if s.record != nil { + s.record.Release() + s.record = nil + } + + if !s.rdr.Next() { + return false + } + + rec := s.rdr.Record() + tableNameArr := rec.Column(rec.Schema().FieldIndices("table_name")[0]).(*array.String) + + bldr := flightsql.NewColumnMetadataBuilder() + columnFields := make([]arrow.Field, 0) + for i := 0; i < tableNameArr.Len(); i++ { + table := tableNameArr.Value(i) + rows, err := s.stmt.QueryContext(s.ctx, table) + if err != nil { + s.err = err + return false + } + + var tableName, name, typ string + var nn int + for rows.Next() { + if err := rows.Scan(&tableName, &name, &typ, &nn); err != nil { + rows.Close() + s.err = err + return false + } + + columnFields = append(columnFields, arrow.Field{ + Name: name, + Type: getArrowTypeFromString(typ), + Nullable: nn == 1, + Metadata: getColumnMetadata(bldr, getSqlTypeFromTypeName(typ), tableName), + }) + } + + rows.Close() + if rows.Err() != nil { + s.err = rows.Err() + return false + } + val := flight.SerializeSchema(arrow.NewSchema(columnFields, nil), s.mem) + s.schemaBldr.Append(val) + + columnFields = columnFields[:0] + } + + schemaCol := s.schemaBldr.NewArray() + defer schemaCol.Release() + + s.record = array.NewRecord(s.Schema(), append(rec.Columns(), schemaCol), rec.NumRows()) + return true +} diff --git a/go/arrow/flight/flightsql/example/type_info.go b/go/arrow/flight/flightsql/example/type_info.go new file mode 100644 index 0000000000000..d9bdca21e02db --- /dev/null +++ b/go/arrow/flight/flightsql/example/type_info.go @@ -0,0 +1,118 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build go1.18 +// +build go1.18 + +package example + +import ( + "strings" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v10/arrow/memory" +) + +func GetTypeInfoResult(mem memory.Allocator) arrow.Record { + typeNames, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, + strings.NewReader(`["bit", "tinyint", "bigint", "longvarbinary", + "varbinary", "text", "longvarchar", "char", + "integer", "smallint", "float", "double", + "numeric", "varchar", "date", "time", "timestamp"]`)) + defer typeNames.Release() + + dataType, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, + strings.NewReader(`[-7, -6, -5, -4, -3, -1, -1, 1, 4, 5, 6, 8, 8, 12, 91, 92, 93]`)) + defer dataType.Release() + + columnSize, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, + strings.NewReader(`[1, 3, 19, 65536, 255, 65536, 65536, 255, 9, 5, 7, 15, 15, 255, 10, 8, 32]`)) + defer columnSize.Release() + + literalPrefix, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, + strings.NewReader(`[null, null, null, null, null, "'", "'", "'", null, null, null, null, null, "'" ,"'", "'", "'"]`)) + defer literalPrefix.Release() + + literalSuffix, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, + strings.NewReader(`[null, null, null, null, null, "'", "'", "'", null, null, null, null, null, "'" ,"'", "'", "'"]`)) + defer literalSuffix.Release() + + createParams, _, _ := array.FromJSON(mem, arrow.ListOfField(arrow.Field{Name: "item", Type: arrow.BinaryTypes.String, Nullable: false}), + strings.NewReader(`[[], [], [], [], [], ["length"], ["length"], ["length"], [], [], [], [], [], ["length"], [], [], []]`)) + defer createParams.Release() + + nullable, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, + strings.NewReader(`[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]`)) + defer nullable.Release() + + // reference for creating a boolean() array with only zeros + zeroBoolArray, _, err := array.FromJSON(mem, arrow.FixedWidthTypes.Boolean, + strings.NewReader(`[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]`), array.WithUseNumber()) + if err != nil { + panic(err) + } + defer zeroBoolArray.Release() + caseSensitive := zeroBoolArray + + searchable, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, + strings.NewReader(`[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]`)) + defer searchable.Release() + + unsignedAttribute := zeroBoolArray + fixedPrecScale := zeroBoolArray + autoUniqueVal := zeroBoolArray + + localTypeName := typeNames + + zeroIntArray, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, + strings.NewReader(`[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]`)) + defer zeroIntArray.Release() + + minimalScale := zeroIntArray + maximumScale := zeroIntArray + sqlDataType := dataType + sqlDateTimeSub := zeroIntArray + numPrecRadix := zeroIntArray + intervalPrecision := zeroIntArray + + return array.NewRecord(schema_ref.XdbcTypeInfo, []arrow.Array{ + typeNames, dataType, columnSize, literalPrefix, literalSuffix, + createParams, nullable, caseSensitive, searchable, unsignedAttribute, + fixedPrecScale, autoUniqueVal, localTypeName, minimalScale, maximumScale, + sqlDataType, sqlDateTimeSub, numPrecRadix, intervalPrecision}, 17) +} + +func GetFilteredTypeInfoResult(mem memory.Allocator, filter int32) arrow.Record { + batch := GetTypeInfoResult(mem) + defer batch.Release() + + dataTypeVector := []int32{-7, -6, -5, -4, -3, -1, -1, 1, 4, 5, 6, 8, 8, 12, 91, 92, 93} + start, end := -1, -1 + for i, v := range dataTypeVector { + if filter == v { + if start == -1 { + start = i + } + } else if start != -1 && end == -1 { + end = i + break + } + } + + return batch.NewSlice(int64(start), int64(end)) +} diff --git a/go/arrow/flight/flightsql/schema_ref/reference_schemas.go b/go/arrow/flight/flightsql/schema_ref/reference_schemas.go new file mode 100644 index 0000000000000..7a4a14064d540 --- /dev/null +++ b/go/arrow/flight/flightsql/schema_ref/reference_schemas.go @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package schema_ref contains the expected reference Schemas to be used +// by FlightSQL servers and clients. +package schema_ref + +import "github.com/apache/arrow/go/v10/arrow" + +var ( + Catalogs = arrow.NewSchema( + []arrow.Field{{Name: "catalog_name", Type: arrow.BinaryTypes.String}}, nil) + DBSchemas = arrow.NewSchema([]arrow.Field{ + {Name: "catalog_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "db_schema_name", Type: arrow.BinaryTypes.String}, + }, nil) + Tables = arrow.NewSchema([]arrow.Field{ + {Name: "catalog_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "db_schema_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "table_name", Type: arrow.BinaryTypes.String}, + {Name: "table_type", Type: arrow.BinaryTypes.String}, + }, nil) + TablesWithIncludedSchema = arrow.NewSchema([]arrow.Field{ + {Name: "catalog_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "db_schema_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "table_name", Type: arrow.BinaryTypes.String}, + {Name: "table_type", Type: arrow.BinaryTypes.String}, + {Name: "table_schema", Type: arrow.BinaryTypes.Binary}, + }, nil) + TableTypes = arrow.NewSchema([]arrow.Field{ + {Name: "table_type", Type: arrow.BinaryTypes.String}, + }, nil) + PrimaryKeys = arrow.NewSchema([]arrow.Field{ + {Name: "catalog_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "db_schema_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "table_name", Type: arrow.BinaryTypes.String}, + {Name: "column_name", Type: arrow.BinaryTypes.String}, + {Name: "key_sequence", Type: arrow.PrimitiveTypes.Int32}, + {Name: "key_name", Type: arrow.BinaryTypes.String, Nullable: true}, + }, nil) + ImportedExportedKeysAndCrossReference = arrow.NewSchema([]arrow.Field{ + {Name: "pk_catalog_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "pk_db_schema_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "pk_table_name", Type: arrow.BinaryTypes.String, Nullable: false}, + {Name: "pk_column_name", Type: arrow.BinaryTypes.String, Nullable: false}, + {Name: "fk_catalog_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "fk_db_schema_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "fk_table_name", Type: arrow.BinaryTypes.String, Nullable: false}, + {Name: "fk_column_name", Type: arrow.BinaryTypes.String, Nullable: false}, + {Name: "key_sequence", Type: arrow.PrimitiveTypes.Int32, Nullable: false}, + {Name: "fk_key_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "pk_key_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "update_rule", Type: arrow.PrimitiveTypes.Uint8, Nullable: false}, + {Name: "delete_rule", Type: arrow.PrimitiveTypes.Uint8, Nullable: false}, + }, nil) + ImportedKeys = ImportedExportedKeysAndCrossReference + ExportedKeys = ImportedExportedKeysAndCrossReference + CrossReference = ImportedExportedKeysAndCrossReference + SqlInfo = arrow.NewSchema([]arrow.Field{ + {Name: "info_name", Type: arrow.PrimitiveTypes.Uint32}, + {Name: "value", Type: arrow.DenseUnionOf([]arrow.Field{ + {Name: "string_value", Type: arrow.BinaryTypes.String}, + {Name: "bool_value", Type: arrow.FixedWidthTypes.Boolean}, + {Name: "bigint_value", Type: arrow.PrimitiveTypes.Int64}, + {Name: "int32_bitmask", Type: arrow.PrimitiveTypes.Int32}, + {Name: "string_list", Type: arrow.ListOf(arrow.BinaryTypes.String)}, + {Name: "int32_to_int32_list_map", + Type: arrow.MapOf(arrow.PrimitiveTypes.Int32, + arrow.ListOf(arrow.PrimitiveTypes.Int32))}, + }, []arrow.UnionTypeCode{0, 1, 2, 3, 4, 5})}, + }, nil) + XdbcTypeInfo = arrow.NewSchema([]arrow.Field{ + {Name: "type_name", Type: arrow.BinaryTypes.String, Nullable: false}, + {Name: "data_type", Type: arrow.PrimitiveTypes.Int32, Nullable: false}, + {Name: "column_size", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + {Name: "literal_prefix", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "literal_suffix", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "create_params", Type: arrow.ListOfField(arrow.Field{Name: "item", Type: arrow.BinaryTypes.String, Nullable: false}), Nullable: true}, + {Name: "nullable", Type: arrow.PrimitiveTypes.Int32, Nullable: false}, + {Name: "case_sensitive", Type: arrow.FixedWidthTypes.Boolean, Nullable: false}, + {Name: "searchable", Type: arrow.PrimitiveTypes.Int32, Nullable: false}, + {Name: "unsigned_attribute", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, + {Name: "fixed_prec_scale", Type: arrow.FixedWidthTypes.Boolean, Nullable: false}, + {Name: "auto_increment", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, + {Name: "local_type_name", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "minimum_scale", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + {Name: "maximum_scale", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + {Name: "sql_data_type", Type: arrow.PrimitiveTypes.Int32, Nullable: false}, + {Name: "datetime_subcode", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + {Name: "num_prec_radix", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + {Name: "interval_precision", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + }, nil) +) diff --git a/go/arrow/flight/flightsql/server.go b/go/arrow/flight/flightsql/server.go new file mode 100644 index 0000000000000..8080df9e4bded --- /dev/null +++ b/go/arrow/flight/flightsql/server.go @@ -0,0 +1,782 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flightsql + +import ( + "context" + "fmt" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql/schema_ref" + pb "github.com/apache/arrow/go/v10/arrow/flight/internal/flight" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" +) + +// the following interfaces wrap the Protobuf commands to avoid +// exposing the Protobuf types themselves in the API. + +// StatementQuery represents a Sql Query +type StatementQuery interface { + GetQuery() string +} + +// StatementUpdate represents a SQL update query +type StatementUpdate interface { + GetQuery() string +} + +// StatementQueryTicket represents a request to execute a query +type StatementQueryTicket interface { + // GetStatementHandle returns the server-generated opaque + // identifier for the query + GetStatementHandle() []byte +} + +// PreparedStatementQuery represents a prepared query statement +type PreparedStatementQuery interface { + // GetPreparedStatementHandle returns the server-generated opaque + // identifier for the statement + GetPreparedStatementHandle() []byte +} + +// PreparedStatementUpdate represents a prepared update statement +type PreparedStatementUpdate interface { + // GetPreparedStatementHandle returns the server-generated opaque + // identifier for the statement + GetPreparedStatementHandle() []byte +} + +// ActionClosePreparedStatementRequest represents a request to close +// a prepared statement +type ActionClosePreparedStatementRequest interface { + // GetPreparedStatementHandle returns the server-generated opaque + // identifier for the statement + GetPreparedStatementHandle() []byte +} + +// ActionCreatePreparedStatementRequest represents a request to construct +// a new prepared statement +type ActionCreatePreparedStatementRequest interface { + GetQuery() string +} + +// ActionCreatePreparedStatementResult is the result of creating a new +// prepared statement, optionally including the dataset and parameter +// schemas. +type ActionCreatePreparedStatementResult struct { + Handle []byte + DatasetSchema *arrow.Schema + ParameterSchema *arrow.Schema +} + +type getXdbcTypeInfo struct { + *pb.CommandGetXdbcTypeInfo +} + +func (c *getXdbcTypeInfo) GetDataType() *int32 { return c.DataType } + +// GetXdbcTypeInfo represents a request for SQL Data Type information +type GetXdbcTypeInfo interface { + // GetDataType returns either nil (get for all types) + // or a specific SQL type ID to fetch information about. + GetDataType() *int32 +} + +// GetSqlInfo represents a request for SQL Information +type GetSqlInfo interface { + // GetInfo returns a slice of SqlInfo ids to return information about + GetInfo() []uint32 +} + +type getDBSchemas struct { + *pb.CommandGetDbSchemas +} + +func (c *getDBSchemas) GetCatalog() *string { return c.Catalog } +func (c *getDBSchemas) GetDBSchemaFilterPattern() *string { return c.DbSchemaFilterPattern } + +// GetDBSchemas represents a request for list of database schemas +type GetDBSchemas interface { + GetCatalog() *string + GetDBSchemaFilterPattern() *string +} + +type getTables struct { + *pb.CommandGetTables +} + +func (c *getTables) GetCatalog() *string { return c.Catalog } +func (c *getTables) GetDBSchemaFilterPattern() *string { return c.DbSchemaFilterPattern } +func (c *getTables) GetTableNameFilterPattern() *string { return c.TableNameFilterPattern } + +// GetTables represents a request to list the database's tables +type GetTables interface { + GetCatalog() *string + GetDBSchemaFilterPattern() *string + GetTableNameFilterPattern() *string + GetTableTypes() []string + GetIncludeSchema() bool +} + +// BaseServer must be embedded into any FlightSQL Server implementation +// and provides default implementations of all methods returning an +// unimplemented error if called. This allows consumers to gradually +// implement methods as they want instead of requiring all consumers to +// boilerplate the same "unimplemented" methods. +// +// The base implementation also contains handling for registering sql info +// and serving it up in response to GetSqlInfo requests. +type BaseServer struct { + sqlInfoToResult SqlInfoResultMap + // Alloc allows specifying a particular allocator to use for any + // allocations done by the base implementation. + // Will use memory.DefaultAlloctor if nil + Alloc memory.Allocator +} + +func (BaseServer) mustEmbedBaseServer() {} + +// RegisterSqlInfo registers a specific result to return for a given sqlinfo +// id. The result must be one of the following types: string, bool, int64, +// int32, []string, or map[int32][]int32. +// +// Once registered, this value will be returned for any SqlInfo requests. +func (b *BaseServer) RegisterSqlInfo(id SqlInfo, result interface{}) error { + if b.sqlInfoToResult == nil { + b.sqlInfoToResult = make(SqlInfoResultMap) + } + + switch result.(type) { + case string, bool, int64, int32, []string, map[int32][]int32: + b.sqlInfoToResult[uint32(id)] = result + default: + return fmt.Errorf("invalid sql info type '%T' registered for id: %d", result, id) + } + return nil +} + +func (BaseServer) GetFlightInfoStatement(context.Context, StatementQuery, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Errorf(codes.Unimplemented, "GetFlightInfoStatement not implemented") +} + +func (BaseServer) GetSchemaStatement(context.Context, StatementQuery, *flight.FlightDescriptor) (*flight.SchemaResult, error) { + return nil, status.Errorf(codes.Unimplemented, "GetSchemaStatement not implemented") +} + +func (BaseServer) DoGetStatement(context.Context, StatementQueryTicket) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetStatement not implemented") +} + +func (BaseServer) GetFlightInfoPreparedStatement(context.Context, PreparedStatementQuery, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Errorf(codes.Unimplemented, "GetFlightInfoPreparedStatement not implemented") +} + +func (BaseServer) GetSchemaPreparedStatement(context.Context, PreparedStatementQuery, *flight.FlightDescriptor) (*flight.SchemaResult, error) { + return nil, status.Errorf(codes.Unimplemented, "GetSchemaPreparedStatement not implemented") +} + +func (BaseServer) DoGetPreparedStatement(context.Context, PreparedStatementQuery) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetPreparedStatement not implemented") +} + +func (BaseServer) GetFlightInfoCatalogs(context.Context, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Errorf(codes.Unimplemented, "GetFlightInfoCatalogs not implemented") +} + +func (BaseServer) DoGetCatalogs(context.Context) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetCatalogs not implemented") +} + +func (BaseServer) GetFlightInfoXdbcTypeInfo(context.Context, GetXdbcTypeInfo, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Errorf(codes.Unimplemented, "GetFlightInfoXdbcTypeInfo not implemented") +} + +func (BaseServer) DoGetXdbcTypeInfo(context.Context, GetXdbcTypeInfo) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetXdbcTypeInfo not implemented") +} + +// GetFlightInfoSqlInfo is a base implementation of GetSqlInfo by using any +// registered sqlinfo (by calling RegisterSqlInfo). Will return an error +// if there is no sql info registered, otherwise a FlightInfo for retrieving +// the Sql info. +func (b *BaseServer) GetFlightInfoSqlInfo(_ context.Context, _ GetSqlInfo, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + if len(b.sqlInfoToResult) == 0 { + return nil, status.Error(codes.NotFound, "no sql information available") + } + + if b.Alloc == nil { + b.Alloc = memory.DefaultAllocator + } + + return &flight.FlightInfo{ + Endpoint: []*flight.FlightEndpoint{{Ticket: &flight.Ticket{Ticket: desc.Cmd}}}, + FlightDescriptor: desc, + TotalRecords: -1, + TotalBytes: -1, + Schema: flight.SerializeSchema(schema_ref.SqlInfo, b.Alloc), + }, nil +} + +// DoGetSqlInfo returns a flight stream containing the list of sqlinfo results +func (b *BaseServer) DoGetSqlInfo(_ context.Context, cmd GetSqlInfo) (*arrow.Schema, <-chan flight.StreamChunk, error) { + if b.Alloc == nil { + b.Alloc = memory.DefaultAllocator + } + + bldr := array.NewRecordBuilder(b.Alloc, schema_ref.SqlInfo) + defer bldr.Release() + + nameFieldBldr := bldr.Field(0).(*array.Uint32Builder) + valFieldBldr := bldr.Field(1).(*array.DenseUnionBuilder) + + // doesn't take ownership, no calls to retain. so we don't need + // extra releases. + sqlInfoResultBldr := newSqlInfoResultBuilder(valFieldBldr) + + // populate both the nameFieldBldr and the values for each + // element on command.info. + // valueFieldBldr is populated depending on the data type + // since it's a dense union. The population for each + // data type is handled by the sqlInfoResultBuilder. + for _, info := range cmd.GetInfo() { + val, ok := b.sqlInfoToResult[info] + if !ok { + return nil, nil, status.Errorf(codes.NotFound, "no information for sql info number %d", info) + } + nameFieldBldr.Append(info) + sqlInfoResultBldr.Append(val) + } + + batch := bldr.NewRecord() + defer batch.Release() + debug.Assert(int(batch.NumRows()) == len(cmd.GetInfo()), "too many rows added to SqlInfo result") + + ch := make(chan flight.StreamChunk) + rdr, err := array.NewRecordReader(schema_ref.SqlInfo, []arrow.Record{batch}) + if err != nil { + return nil, nil, status.Errorf(codes.Internal, "error producing record response: %s", err.Error()) + } + + // StreamChunksFromReader will call release on the reader when done + go flight.StreamChunksFromReader(rdr, ch) + return schema_ref.SqlInfo, ch, nil +} + +func (BaseServer) GetFlightInfoSchemas(context.Context, GetDBSchemas, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Errorf(codes.Unimplemented, "GetFlightInfoSchemas not implemented") +} + +func (BaseServer) DoGetDBSchemas(context.Context, GetDBSchemas) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetDBSchemas not implemented") +} + +func (BaseServer) GetFlightInfoTables(context.Context, GetTables, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Errorf(codes.Unimplemented, "GetFlightInfoTables not implemented") +} + +func (BaseServer) DoGetTables(context.Context, GetTables) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetTables not implemented") +} + +func (BaseServer) GetFlightInfoTableTypes(context.Context, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Errorf(codes.Unimplemented, "GetFlightInfoTableTypes not implemented") +} + +func (BaseServer) DoGetTableTypes(context.Context) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetTableTypes not implemented") +} + +func (BaseServer) GetFlightInfoPrimaryKeys(context.Context, TableRef, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Error(codes.Unimplemented, "GetFlightInfoPrimaryKeys not implemented") +} + +func (BaseServer) DoGetPrimaryKeys(context.Context, TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetPrimaryKeys not implemented") +} + +func (BaseServer) GetFlightInfoExportedKeys(context.Context, TableRef, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Error(codes.Unimplemented, "GetFlightInfoExportedKeys not implemented") +} + +func (BaseServer) DoGetExportedKeys(context.Context, TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetExportedKeys not implemented") +} + +func (BaseServer) GetFlightInfoImportedKeys(context.Context, TableRef, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Error(codes.Unimplemented, "GetFlightInfoImportedKeys not implemented") +} + +func (BaseServer) DoGetImportedKeys(context.Context, TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetImportedKeys not implemented") +} + +func (BaseServer) GetFlightInfoCrossReference(context.Context, CrossTableRef, *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return nil, status.Error(codes.Unimplemented, "GetFlightInfoCrossReference not implemented") +} + +func (BaseServer) DoGetCrossReference(context.Context, CrossTableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return nil, nil, status.Errorf(codes.Unimplemented, "DoGetCrossReference not implemented") +} + +func (BaseServer) CreatePreparedStatement(context.Context, ActionCreatePreparedStatementRequest) (res ActionCreatePreparedStatementResult, err error) { + return res, status.Error(codes.Unimplemented, "CreatePreparedStatement not implemented") +} + +func (BaseServer) ClosePreparedStatement(context.Context, ActionClosePreparedStatementRequest) error { + return status.Error(codes.Unimplemented, "ClosePreparedStatement not implemented") +} + +func (BaseServer) DoPutCommandStatementUpdate(context.Context, StatementUpdate) (int64, error) { + return 0, status.Error(codes.Unimplemented, "DoPutCommandStatementUpdate not implemented") +} +func (BaseServer) DoPutPreparedStatementQuery(context.Context, PreparedStatementQuery, flight.MessageReader, flight.MetadataWriter) error { + return status.Error(codes.Unimplemented, "DoPutPreparedStatementQuery not implemented") +} + +func (BaseServer) DoPutPreparedStatementUpdate(context.Context, PreparedStatementUpdate, flight.MessageReader) (int64, error) { + return 0, status.Error(codes.Unimplemented, "DoPutPreparedStatementUpdate not implemented") +} + +// Server is the required interface for a FlightSQL server. It is implemented by +// BaseServer which must be embedded in any implementation. The default +// implementation by BaseServer for each of these (except GetSqlInfo) +// +// GetFlightInfo* methods should return the FlightInfo object representing where +// to retrieve the results for a given request. +// +// DoGet* methods should return the Schema of the resulting stream along with +// a channel to retrieve stream chunks (each chunk is a record batch and optionally +// a descriptor and app metadata). The channel will be read from until it +// closes, sending each chunk on the stream. Since the channel is returned +// from the method, it should be populated within a goroutine to ensure +// there are no deadlocks. +type Server interface { + // GetFlightInfoStatement returns a FlightInfo for executing the requested sql query + GetFlightInfoStatement(context.Context, StatementQuery, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // GetFlightInfoStatement returns the schema of the result set of the requested sql query + GetSchemaStatement(context.Context, StatementQuery, *flight.FlightDescriptor) (*flight.SchemaResult, error) + // DoGetStatement returns a stream containing the query results for the + // requested statement handle that was populated by GetFlightInfoStatement + DoGetStatement(context.Context, StatementQueryTicket) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoPreparedStatement returns a FlightInfo for executing an already + // prepared statement with the provided statement handle. + GetFlightInfoPreparedStatement(context.Context, PreparedStatementQuery, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // GetSchemaPreparedStatement returns the schema of the result set of executing an already + // prepared statement with the provided statement handle. + GetSchemaPreparedStatement(context.Context, PreparedStatementQuery, *flight.FlightDescriptor) (*flight.SchemaResult, error) + // DoGetPreparedStatement returns a stream containing the results from executing + // a prepared statement query with the provided statement handle. + DoGetPreparedStatement(context.Context, PreparedStatementQuery) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoCatalogs returns a FlightInfo for the listing of all catalogs + GetFlightInfoCatalogs(context.Context, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // DoGetCatalogs returns the stream containing the list of catalogs + DoGetCatalogs(context.Context) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoXdbcTypeInfo returns a FlightInfo for retrieving data type info + GetFlightInfoXdbcTypeInfo(context.Context, GetXdbcTypeInfo, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // DoGetXdbcTypeInfo returns a stream containing the information about the + // requested supported datatypes + DoGetXdbcTypeInfo(context.Context, GetXdbcTypeInfo) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoSqlInfo returns a FlightInfo for retrieving SqlInfo from the server + GetFlightInfoSqlInfo(context.Context, GetSqlInfo, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // DoGetSqlInfo returns a stream containing the list of SqlInfo results + DoGetSqlInfo(context.Context, GetSqlInfo) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoSchemas returns a FlightInfo for requesting a list of schemas + GetFlightInfoSchemas(context.Context, GetDBSchemas, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // DoGetDBSchemas returns a stream containing the list of schemas + DoGetDBSchemas(context.Context, GetDBSchemas) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoTables returns a FlightInfo for listing the tables available + GetFlightInfoTables(context.Context, GetTables, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // DoGetTables returns a stream containing the list of tables + DoGetTables(context.Context, GetTables) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoTableTypes returns a FlightInfo for retrieving a list + // of table types supported + GetFlightInfoTableTypes(context.Context, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // DoGetTableTypes returns a stream containing the data related to the table types + DoGetTableTypes(context.Context) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoPrimaryKeys returns a FlightInfo for extracting information about primary keys + GetFlightInfoPrimaryKeys(context.Context, TableRef, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // DoGetPrimaryKeys returns a stream containing the data related to primary keys + DoGetPrimaryKeys(context.Context, TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoExportedKeys returns a FlightInfo for extracting information about foreign keys + GetFlightInfoExportedKeys(context.Context, TableRef, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // DoGetExportedKeys returns a stream containing the data related to foreign keys + DoGetExportedKeys(context.Context, TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoImportedKeys returns a FlightInfo for extracting information about imported keys + GetFlightInfoImportedKeys(context.Context, TableRef, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // DoGetImportedKeys returns a stream containing the data related to imported keys + DoGetImportedKeys(context.Context, TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) + // GetFlightInfoCrossReference returns a FlightInfo for extracting data related + // to primary and foreign keys + GetFlightInfoCrossReference(context.Context, CrossTableRef, *flight.FlightDescriptor) (*flight.FlightInfo, error) + // DoGetCrossReference returns a stream of data related to foreign and primary keys + DoGetCrossReference(context.Context, CrossTableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) + // DoPutCommandStatementUpdate executes a sql update statement and returns + // the number of affected rows + DoPutCommandStatementUpdate(context.Context, StatementUpdate) (int64, error) + // CreatePreparedStatement constructs a prepared statement from a sql query + // and returns an opaque statement handle for use. + CreatePreparedStatement(context.Context, ActionCreatePreparedStatementRequest) (ActionCreatePreparedStatementResult, error) + // ClosePreparedStatement closes the prepared statement identified by the requested + // opaque statement handle. + ClosePreparedStatement(context.Context, ActionClosePreparedStatementRequest) error + // DoPutPreparedStatementQuery binds parameters to a given prepared statement + // identified by the provided statement handle. + // + // The provided MessageReader is a stream of record batches with optional + // app metadata and flight descriptors to represent the values to bind + // to the parameters. + // + // Currently anything written to the writer will be ignored. It is in the + // interface for potential future enhancements to avoid having to change + // the interface in the future. + DoPutPreparedStatementQuery(context.Context, PreparedStatementQuery, flight.MessageReader, flight.MetadataWriter) error + // DoPutPreparedStatementUpdate executes an update SQL Prepared statement + // for the specified statement handle. The reader allows providing a sequence + // of uploaded record batches to bind the parameters to. Returns the number + // of affected records. + DoPutPreparedStatementUpdate(context.Context, PreparedStatementUpdate, flight.MessageReader) (int64, error) + + mustEmbedBaseServer() +} + +// NewFlightServer constructs a FlightRPC server from the provided +// FlightSQL Server so that it can be passed to RegisterFlightService. +func NewFlightServer(srv Server) flight.FlightServer { + return &flightSqlServer{srv: srv, mem: memory.DefaultAllocator} +} + +// NewFlightServerWithAllocator constructs a FlightRPC server from +// the provided FlightSQL Server so that it can be passed to +// RegisterFlightService, setting the provided allocator into the server +// for use with any allocations necessary by the routing. +// +// Will default to memory.DefaultAllocator if mem is nil +func NewFlightServerWithAllocator(srv Server, mem memory.Allocator) flight.FlightServer { + if mem == nil { + mem = memory.DefaultAllocator + } + return &flightSqlServer{srv: srv, mem: mem} +} + +// flightSqlServer is a wrapper around a FlightSQL server interface to +// perform routing from FlightRPC to FlightSQL. +type flightSqlServer struct { + flight.BaseFlightServer + mem memory.Allocator + srv Server +} + +func (f *flightSqlServer) GetFlightInfo(ctx context.Context, request *flight.FlightDescriptor) (*flight.FlightInfo, error) { + var ( + anycmd anypb.Any + cmd proto.Message + err error + ) + if err = proto.Unmarshal(request.Cmd, &anycmd); err != nil { + return nil, status.Errorf(codes.InvalidArgument, "unable to parse command: %s", err.Error()) + } + + if cmd, err = anycmd.UnmarshalNew(); err != nil { + return nil, status.Errorf(codes.InvalidArgument, "could not unmarshal Any to a command type: %s", err.Error()) + } + + switch cmd := cmd.(type) { + case *pb.CommandStatementQuery: + return f.srv.GetFlightInfoStatement(ctx, cmd, request) + case *pb.CommandPreparedStatementQuery: + return f.srv.GetFlightInfoPreparedStatement(ctx, cmd, request) + case *pb.CommandGetCatalogs: + return f.srv.GetFlightInfoCatalogs(ctx, request) + case *pb.CommandGetDbSchemas: + return f.srv.GetFlightInfoSchemas(ctx, &getDBSchemas{cmd}, request) + case *pb.CommandGetTables: + return f.srv.GetFlightInfoTables(ctx, &getTables{cmd}, request) + case *pb.CommandGetTableTypes: + return f.srv.GetFlightInfoTableTypes(ctx, request) + case *pb.CommandGetXdbcTypeInfo: + return f.srv.GetFlightInfoXdbcTypeInfo(ctx, &getXdbcTypeInfo{cmd}, request) + case *pb.CommandGetSqlInfo: + return f.srv.GetFlightInfoSqlInfo(ctx, cmd, request) + case *pb.CommandGetPrimaryKeys: + return f.srv.GetFlightInfoPrimaryKeys(ctx, pkToTableRef(cmd), request) + case *pb.CommandGetExportedKeys: + return f.srv.GetFlightInfoExportedKeys(ctx, exkToTableRef(cmd), request) + case *pb.CommandGetImportedKeys: + return f.srv.GetFlightInfoImportedKeys(ctx, impkToTableRef(cmd), request) + case *pb.CommandGetCrossReference: + return f.srv.GetFlightInfoCrossReference(ctx, toCrossTableRef(cmd), request) + } + + return nil, status.Error(codes.InvalidArgument, "requested command is invalid") +} + +func (f *flightSqlServer) GetSchema(ctx context.Context, request *flight.FlightDescriptor) (*flight.SchemaResult, error) { + var ( + anycmd anypb.Any + cmd proto.Message + err error + ) + if err = proto.Unmarshal(request.Cmd, &anycmd); err != nil { + return nil, status.Errorf(codes.InvalidArgument, "unable to parse command: %s", err.Error()) + } + + if cmd, err = anycmd.UnmarshalNew(); err != nil { + return nil, status.Errorf(codes.InvalidArgument, "could not unmarshal Any to a command type: %s", err.Error()) + } + + switch cmd := cmd.(type) { + case *pb.CommandStatementQuery: + return f.srv.GetSchemaStatement(ctx, cmd, request) + case *pb.CommandPreparedStatementQuery: + return f.srv.GetSchemaPreparedStatement(ctx, cmd, request) + case *pb.CommandGetCatalogs: + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.Catalogs, f.mem)}, nil + case *pb.CommandGetDbSchemas: + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.DBSchemas, f.mem)}, nil + case *pb.CommandGetTables: + if cmd.GetIncludeSchema() { + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.TablesWithIncludedSchema, f.mem)}, nil + } + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.Tables, f.mem)}, nil + case *pb.CommandGetTableTypes: + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.TableTypes, f.mem)}, nil + case *pb.CommandGetXdbcTypeInfo: + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.XdbcTypeInfo, f.mem)}, nil + case *pb.CommandGetSqlInfo: + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.SqlInfo, f.mem)}, nil + case *pb.CommandGetPrimaryKeys: + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.PrimaryKeys, f.mem)}, nil + case *pb.CommandGetExportedKeys: + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.ExportedKeys, f.mem)}, nil + case *pb.CommandGetImportedKeys: + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.ImportedKeys, f.mem)}, nil + case *pb.CommandGetCrossReference: + return &flight.SchemaResult{Schema: flight.SerializeSchema(schema_ref.CrossReference, f.mem)}, nil + } + + return nil, status.Errorf(codes.InvalidArgument, "requested command is invalid: %s", anycmd.GetTypeUrl()) +} + +func (f *flightSqlServer) DoGet(request *flight.Ticket, stream flight.FlightService_DoGetServer) (err error) { + var ( + anycmd anypb.Any + cmd proto.Message + cc <-chan flight.StreamChunk + sc *arrow.Schema + ) + if err = proto.Unmarshal(request.Ticket, &anycmd); err != nil { + return status.Errorf(codes.InvalidArgument, "unable to parse ticket: %s", err.Error()) + } + + if cmd, err = anycmd.UnmarshalNew(); err != nil { + return status.Errorf(codes.InvalidArgument, "unable to unmarshal proto.Any: %s", err.Error()) + } + + switch cmd := cmd.(type) { + case *pb.TicketStatementQuery: + sc, cc, err = f.srv.DoGetStatement(stream.Context(), cmd) + case *pb.CommandPreparedStatementQuery: + sc, cc, err = f.srv.DoGetPreparedStatement(stream.Context(), cmd) + case *pb.CommandGetCatalogs: + sc, cc, err = f.srv.DoGetCatalogs(stream.Context()) + case *pb.CommandGetDbSchemas: + sc, cc, err = f.srv.DoGetDBSchemas(stream.Context(), &getDBSchemas{cmd}) + case *pb.CommandGetTables: + sc, cc, err = f.srv.DoGetTables(stream.Context(), &getTables{cmd}) + case *pb.CommandGetTableTypes: + sc, cc, err = f.srv.DoGetTableTypes(stream.Context()) + case *pb.CommandGetXdbcTypeInfo: + sc, cc, err = f.srv.DoGetXdbcTypeInfo(stream.Context(), &getXdbcTypeInfo{cmd}) + case *pb.CommandGetSqlInfo: + sc, cc, err = f.srv.DoGetSqlInfo(stream.Context(), cmd) + case *pb.CommandGetPrimaryKeys: + sc, cc, err = f.srv.DoGetPrimaryKeys(stream.Context(), pkToTableRef(cmd)) + case *pb.CommandGetExportedKeys: + sc, cc, err = f.srv.DoGetExportedKeys(stream.Context(), exkToTableRef(cmd)) + case *pb.CommandGetImportedKeys: + sc, cc, err = f.srv.DoGetImportedKeys(stream.Context(), impkToTableRef(cmd)) + case *pb.CommandGetCrossReference: + sc, cc, err = f.srv.DoGetCrossReference(stream.Context(), toCrossTableRef(cmd)) + default: + return status.Error(codes.InvalidArgument, "requested command is invalid") + } + + if err != nil { + return err + } + + wr := flight.NewRecordWriter(stream, ipc.WithSchema(sc)) + defer wr.Close() + + for chunk := range cc { + if chunk.Err != nil { + return err + } + + wr.SetFlightDescriptor(chunk.Desc) + if err = wr.WriteWithAppMetadata(chunk.Data, chunk.AppMetadata); err != nil { + return err + } + chunk.Data.Release() + } + + return err +} + +type putMetadataWriter struct { + stream flight.FlightService_DoPutServer +} + +func (p *putMetadataWriter) WriteMetadata(appMetadata []byte) error { + return p.stream.Send(&flight.PutResult{AppMetadata: appMetadata}) +} + +func (f *flightSqlServer) DoPut(stream flight.FlightService_DoPutServer) error { + rdr, err := flight.NewRecordReader(stream, ipc.WithAllocator(f.mem), ipc.WithDelayReadSchema(true)) + if err != nil { + return status.Errorf(codes.InvalidArgument, "failed to read input stream: %s", err.Error()) + } + defer rdr.Release() + + // flight descriptor should have come with the schema message + request := rdr.LatestFlightDescriptor() + + var ( + anycmd anypb.Any + cmd proto.Message + ) + if err = proto.Unmarshal(request.Cmd, &anycmd); err != nil { + return status.Errorf(codes.InvalidArgument, "unable to parse command: %s", err.Error()) + } + + if cmd, err = anycmd.UnmarshalNew(); err != nil { + return status.Errorf(codes.InvalidArgument, "could not unmarshal google.protobuf.Any: %s", err.Error()) + } + + switch cmd := cmd.(type) { + case *pb.CommandStatementUpdate: + recordCount, err := f.srv.DoPutCommandStatementUpdate(stream.Context(), cmd) + if err != nil { + return err + } + + result := pb.DoPutUpdateResult{RecordCount: recordCount} + out := &flight.PutResult{} + if out.AppMetadata, err = proto.Marshal(&result); err != nil { + return status.Errorf(codes.Internal, "failed to marshal PutResult: %s", err.Error()) + } + return stream.Send(out) + case *pb.CommandPreparedStatementQuery: + return f.srv.DoPutPreparedStatementQuery(stream.Context(), cmd, rdr, &putMetadataWriter{stream}) + case *pb.CommandPreparedStatementUpdate: + recordCount, err := f.srv.DoPutPreparedStatementUpdate(stream.Context(), cmd, rdr) + if err != nil { + return err + } + + result := pb.DoPutUpdateResult{RecordCount: recordCount} + out := &flight.PutResult{} + if out.AppMetadata, err = proto.Marshal(&result); err != nil { + return status.Errorf(codes.Internal, "failed to marshal PutResult: %s", err.Error()) + } + return stream.Send(out) + default: + return status.Error(codes.InvalidArgument, "the defined request is invalid") + } +} + +func (f *flightSqlServer) ListActions(_ *flight.Empty, stream flight.FlightService_ListActionsServer) error { + actions := []string{CreatePreparedStatementActionType, ClosePreparedStatementActionType} + + for _, a := range actions { + if err := stream.Send(&flight.ActionType{Type: a}); err != nil { + return err + } + } + return nil +} + +func (f *flightSqlServer) DoAction(cmd *flight.Action, stream flight.FlightService_DoActionServer) error { + var anycmd anypb.Any + + switch cmd.Type { + case CreatePreparedStatementActionType: + if err := proto.Unmarshal(cmd.Body, &anycmd); err != nil { + return status.Errorf(codes.InvalidArgument, "unable to parse command: %s", err.Error()) + } + + var ( + request pb.ActionCreatePreparedStatementRequest + result pb.ActionCreatePreparedStatementResult + ret pb.Result + ) + if err := anycmd.UnmarshalTo(&request); err != nil { + return status.Errorf(codes.InvalidArgument, "unable to unmarshal google.protobuf.Any: %s", err.Error()) + } + + output, err := f.srv.CreatePreparedStatement(stream.Context(), &request) + if err != nil { + return err + } + + result.PreparedStatementHandle = output.Handle + if output.DatasetSchema != nil { + result.DatasetSchema = flight.SerializeSchema(output.DatasetSchema, f.mem) + } + if output.ParameterSchema != nil { + result.ParameterSchema = flight.SerializeSchema(output.ParameterSchema, f.mem) + } + + if err := anycmd.MarshalFrom(&result); err != nil { + return status.Errorf(codes.Internal, "unable to marshal final response: %s", err.Error()) + } + + if ret.Body, err = proto.Marshal(&anycmd); err != nil { + return status.Errorf(codes.Internal, "unable to marshal result: %s", err.Error()) + } + return stream.Send(&ret) + case ClosePreparedStatementActionType: + if err := proto.Unmarshal(cmd.Body, &anycmd); err != nil { + return status.Errorf(codes.InvalidArgument, "unable to parse command: %s", err.Error()) + } + + var request pb.ActionClosePreparedStatementRequest + if err := anycmd.UnmarshalTo(&request); err != nil { + return status.Errorf(codes.InvalidArgument, "unable to unmarshal google.protobuf.Any: %s", err.Error()) + } + + if err := f.srv.ClosePreparedStatement(stream.Context(), &request); err != nil { + return err + } + + return stream.Send(&pb.Result{}) + default: + return status.Error(codes.InvalidArgument, "the defined request is invalid.") + } +} + +var ( + _ Server = (*BaseServer)(nil) +) diff --git a/go/arrow/flight/flightsql/server_test.go b/go/arrow/flight/flightsql/server_test.go new file mode 100644 index 0000000000000..ece7754bbbdb2 --- /dev/null +++ b/go/arrow/flight/flightsql/server_test.go @@ -0,0 +1,212 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flightsql_test + +import ( + "context" + "strings" + "testing" + + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql" + pb "github.com/apache/arrow/go/v10/arrow/flight/internal/flight" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/stretchr/testify/suite" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" +) + +var dialOpts = []grpc.DialOption{grpc.WithTransportCredentials(insecure.NewCredentials())} + +type UnimplementedFlightSqlServerSuite struct { + suite.Suite + + s flight.Server + cl *flightsql.Client +} + +func (s *UnimplementedFlightSqlServerSuite) SetupSuite() { + s.s = flight.NewServerWithMiddleware(nil) + srv := flightsql.NewFlightServer(&flightsql.BaseServer{}) + s.s.RegisterFlightService(srv) + s.s.Init("localhost:0") + + go s.s.Serve() +} + +func (s *UnimplementedFlightSqlServerSuite) SetupTest() { + cl, err := flightsql.NewClient(s.s.Addr().String(), nil, nil, dialOpts...) + s.Require().NoError(err) + s.cl = cl +} + +func (s *UnimplementedFlightSqlServerSuite) TearDownTest() { + s.Require().NoError(s.cl.Close()) + s.cl = nil +} + +func (s *UnimplementedFlightSqlServerSuite) TearDownSuite() { + s.s.Shutdown() +} + +// the following test functions verify that the default base server will +// correctly route requests to the appropriate interface methods based on +// the descriptor types for DoPut/DoGet/DoAction + +func (s *UnimplementedFlightSqlServerSuite) TestExecute() { + info, err := s.cl.Execute(context.TODO(), "SELECT * FROM IRRELEVANT") + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "GetFlightInfoStatement not implemented") + s.Nil(info) +} + +func (s *UnimplementedFlightSqlServerSuite) TestGetTables() { + info, err := s.cl.GetTables(context.TODO(), &flightsql.GetTablesOpts{}) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "GetFlightInfoTables not implemented") + s.Nil(info) +} + +func (s *UnimplementedFlightSqlServerSuite) TestGetTableTypes() { + info, err := s.cl.GetTableTypes(context.TODO()) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "GetFlightInfoTableTypes not implemented") + s.Nil(info) +} + +func (s *UnimplementedFlightSqlServerSuite) TestGetPrimaryKeys() { + info, err := s.cl.GetPrimaryKeys(context.TODO(), flightsql.TableRef{}) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "GetFlightInfoPrimaryKeys not implemented") + s.Nil(info) +} + +func (s *UnimplementedFlightSqlServerSuite) TestGetExportedKeys() { + info, err := s.cl.GetExportedKeys(context.TODO(), flightsql.TableRef{}) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "GetFlightInfoExportedKeys not implemented") + s.Nil(info) +} + +func (s *UnimplementedFlightSqlServerSuite) TestGetImportedKeys() { + info, err := s.cl.GetImportedKeys(context.TODO(), flightsql.TableRef{}) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "GetFlightInfoImportedKeys not implemented") + s.Nil(info) +} + +func (s *UnimplementedFlightSqlServerSuite) TestGetCrossReference() { + info, err := s.cl.GetCrossReference(context.TODO(), flightsql.TableRef{}, flightsql.TableRef{}) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "GetFlightInfoCrossReference not implemented") + s.Nil(info) +} + +func (s *UnimplementedFlightSqlServerSuite) TestGetCatalogs() { + info, err := s.cl.GetCatalogs(context.TODO()) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "GetFlightInfoCatalogs not implemented") + s.Nil(info) +} + +func (s *UnimplementedFlightSqlServerSuite) TestGetDBSchemas() { + info, err := s.cl.GetDBSchemas(context.TODO(), &flightsql.GetDBSchemasOpts{}) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "GetFlightInfoSchemas not implemented") + s.Nil(info) +} + +func (s *UnimplementedFlightSqlServerSuite) TestGetTypeInfo() { + info, err := s.cl.GetXdbcTypeInfo(context.TODO(), nil) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "GetFlightInfoXdbcTypeInfo not implemented") + s.Nil(info) +} + +func getTicket(cmd proto.Message) *flight.Ticket { + var anycmd anypb.Any + anycmd.MarshalFrom(cmd) + + data, _ := proto.Marshal(&anycmd) + return &flight.Ticket{ + Ticket: data, + } +} + +func (s *UnimplementedFlightSqlServerSuite) TestDoGet() { + tests := []struct { + name string + ticket proto.Message + }{ + {"DoGetStatement", &pb.TicketStatementQuery{}}, + {"DoGetPreparedStatement", &pb.CommandPreparedStatementQuery{}}, + {"DoGetCatalogs", &pb.CommandGetCatalogs{}}, + {"DoGetDBSchemas", &pb.CommandGetDbSchemas{}}, + {"DoGetTables", &pb.CommandGetTables{}}, + {"DoGetTableTypes", &pb.CommandGetTableTypes{}}, + {"DoGetXdbcTypeInfo", &pb.CommandGetXdbcTypeInfo{}}, + {"DoGetPrimaryKeys", &pb.CommandGetPrimaryKeys{}}, + {"DoGetExportedKeys", &pb.CommandGetExportedKeys{}}, + {"DoGetImportedKeys", &pb.CommandGetImportedKeys{}}, + {"DoGetCrossReference", &pb.CommandGetCrossReference{}}, + } + + for _, tt := range tests { + s.Run(tt.name, func() { + rdr, err := s.cl.DoGet(context.TODO(), getTicket(tt.ticket)) + s.Nil(rdr) + s.True(strings.HasSuffix(err.Error(), tt.name+" not implemented"), err.Error()) + }) + } +} + +func (s *UnimplementedFlightSqlServerSuite) TestDoAction() { + prep, err := s.cl.Prepare(context.TODO(), memory.DefaultAllocator, "IRRELEVANT") + s.Nil(prep) + st, ok := status.FromError(err) + s.True(ok) + s.Equal(codes.Unimplemented, st.Code()) + s.Equal(st.Message(), "CreatePreparedStatement not implemented") +} + +func TestBaseServer(t *testing.T) { + suite.Run(t, new(UnimplementedFlightSqlServerSuite)) +} diff --git a/go/arrow/flight/flightsql/sql_info.go b/go/arrow/flight/flightsql/sql_info.go new file mode 100644 index 0000000000000..687f10f6f5af6 --- /dev/null +++ b/go/arrow/flight/flightsql/sql_info.go @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flightsql + +import ( + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" +) + +const ( + strValIdx arrow.UnionTypeCode = iota + boolValIdx + bigintValIdx + int32BitMaskIdx + strListIdx + int32ToInt32ListIdx +) + +// sqlInfoResultBldr is a helper for building up the dense union response +// of a SqlInfo request. +type sqlInfoResultBldr struct { + valueBldr *array.DenseUnionBuilder + + strBldr *array.StringBuilder + boolBldr *array.BooleanBuilder + bigintBldr *array.Int64Builder + int32BitmaskBldr *array.Int32Builder + strListBldr *array.ListBuilder + int32Toint32ListBldr *array.MapBuilder +} + +func newSqlInfoResultBuilder(valueBldr *array.DenseUnionBuilder) *sqlInfoResultBldr { + return &sqlInfoResultBldr{ + valueBldr: valueBldr, + strBldr: valueBldr.Child(int(strValIdx)).(*array.StringBuilder), + boolBldr: valueBldr.Child(int(boolValIdx)).(*array.BooleanBuilder), + bigintBldr: valueBldr.Child(int(bigintValIdx)).(*array.Int64Builder), + int32BitmaskBldr: valueBldr.Child(int(int32BitMaskIdx)).(*array.Int32Builder), + strListBldr: valueBldr.Child(int(strListIdx)).(*array.ListBuilder), + int32Toint32ListBldr: valueBldr.Child(int(int32ToInt32ListIdx)).(*array.MapBuilder), + } +} + +func (s *sqlInfoResultBldr) Append(v interface{}) { + switch v := v.(type) { + case string: + s.valueBldr.Append(strValIdx) + s.strBldr.Append(v) + case bool: + s.valueBldr.Append(boolValIdx) + s.boolBldr.Append(v) + case int64: + s.valueBldr.Append(bigintValIdx) + s.bigintBldr.Append(v) + case int32: + s.valueBldr.Append(int32BitMaskIdx) + s.int32BitmaskBldr.Append(v) + case []string: + s.valueBldr.Append(strListIdx) + s.strListBldr.Append(true) + chld := s.strListBldr.ValueBuilder().(*array.StringBuilder) + chld.AppendValues(v, nil) + case map[int32][]int32: + s.valueBldr.Append(int32ToInt32ListIdx) + s.int32Toint32ListBldr.Append(true) + + kb := s.int32Toint32ListBldr.KeyBuilder().(*array.Int32Builder) + ib := s.int32Toint32ListBldr.ItemBuilder().(*array.ListBuilder) + ch := ib.ValueBuilder().(*array.Int32Builder) + + for key, val := range v { + kb.Append(key) + ib.Append(true) + for _, c := range val { + ch.Append(c) + } + } + } +} diff --git a/go/arrow/flight/flightsql/sqlite_server_test.go b/go/arrow/flight/flightsql/sqlite_server_test.go new file mode 100644 index 0000000000000..15f8271ca2b09 --- /dev/null +++ b/go/arrow/flight/flightsql/sqlite_server_test.go @@ -0,0 +1,783 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build go1.18 +// +build go1.18 + +package flightsql_test + +import ( + "context" + "os" + "strings" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql/example" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + "google.golang.org/protobuf/proto" + sqlite3 "modernc.org/sqlite/lib" +) + +type FlightSqliteServerSuite struct { + suite.Suite + + srv *example.SQLiteFlightSQLServer + s flight.Server + cl *flightsql.Client + + mem *memory.CheckedAllocator +} + +func (s *FlightSqliteServerSuite) getColMetadata(colType int, table string) arrow.Metadata { + bldr := flightsql.NewColumnMetadataBuilder() + bldr.Scale(15).IsReadOnly(false).IsAutoIncrement(false) + if table != "" { + bldr.TableName(table) + } + switch colType { + case sqlite3.SQLITE_TEXT, sqlite3.SQLITE_BLOB: + case sqlite3.SQLITE_INTEGER: + bldr.Precision(10) + case sqlite3.SQLITE_FLOAT: + bldr.Precision(15) + default: + bldr.Precision(0) + } + return bldr.Metadata() +} + +func (s *FlightSqliteServerSuite) SetupTest() { + var err error + s.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) + s.s = flight.NewServerWithMiddleware(nil) + s.srv, err = example.NewSQLiteFlightSQLServer() + s.Require().NoError(err) + s.srv.Alloc = s.mem + + s.s.RegisterFlightService(flightsql.NewFlightServer(s.srv)) + s.s.Init("localhost:0") + s.s.SetShutdownOnSignals(os.Interrupt, os.Kill) + go s.s.Serve() + s.cl, err = flightsql.NewClient(s.s.Addr().String(), nil, nil, dialOpts...) + s.Require().NoError(err) + s.Require().NotNil(s.cl) + s.cl.Alloc = s.mem +} + +func (s *FlightSqliteServerSuite) TearDownTest() { + s.Require().NoError(s.cl.Close()) + s.s.Shutdown() + s.srv = nil + s.mem.AssertSize(s.T(), 0) +} + +func (s *FlightSqliteServerSuite) fromJSON(dt arrow.DataType, json string) arrow.Array { + arr, _, _ := array.FromJSON(s.mem, dt, strings.NewReader(json)) + return arr +} + +func (s *FlightSqliteServerSuite) execCountQuery(query string) int64 { + info, err := s.cl.Execute(context.Background(), query) + s.NoError(err) + + rdr, err := s.cl.DoGet(context.Background(), info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + rec, err := rdr.Read() + s.NoError(err) + return rec.Column(0).(*array.Int64).Value(0) +} + +func (s *FlightSqliteServerSuite) TestCommandStatementQuery() { + ctx := context.Background() + info, err := s.cl.Execute(ctx, "SELECT * FROM intTable") + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.NotNil(rec) + + expectedSchema := arrow.NewSchema([]arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int64, Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, ""), Nullable: true}, + {Name: "keyName", Type: arrow.BinaryTypes.String, Metadata: s.getColMetadata(sqlite3.SQLITE_TEXT, ""), Nullable: true}, + {Name: "value", Type: arrow.PrimitiveTypes.Int64, Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, ""), Nullable: true}, + {Name: "foreignId", Type: arrow.PrimitiveTypes.Int64, Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, ""), Nullable: true}, + }, nil) + + s.Truef(expectedSchema.Equal(rec.Schema()), "expected: %s\ngot: %s", expectedSchema, rec.Schema()) + + idarr := s.fromJSON(arrow.PrimitiveTypes.Int64, `[1, 2, 3, 4]`) + defer idarr.Release() + keyarr := s.fromJSON(arrow.BinaryTypes.String, `["one", "zero", "negative one", null]`) + defer keyarr.Release() + valarr := s.fromJSON(arrow.PrimitiveTypes.Int64, `[1, 0, -1, null]`) + defer valarr.Release() + foreignarr := s.fromJSON(arrow.PrimitiveTypes.Int64, `[1, 1, 1, null]`) + defer foreignarr.Release() + + expectedRec := array.NewRecord(expectedSchema, []arrow.Array{idarr, keyarr, valarr, foreignarr}, 4) + defer expectedRec.Release() + + s.Truef(array.RecordEqual(expectedRec, rec), "expected: %s\ngot: %s", expectedRec, rec) +} + +func (s *FlightSqliteServerSuite) TestCommandGetTables() { + ctx := context.Background() + info, err := s.cl.GetTables(ctx, &flightsql.GetTablesOpts{}) + s.NoError(err) + s.NotNil(info) + + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + catalogName := array.MakeArrayOfNull(s.mem, arrow.BinaryTypes.String, 3) + defer catalogName.Release() + schemaName := array.MakeArrayOfNull(s.mem, arrow.BinaryTypes.String, 3) + defer schemaName.Release() + + tableName := s.fromJSON(arrow.BinaryTypes.String, `["foreignTable", "intTable", "sqlite_sequence"]`) + defer tableName.Release() + + tableType := s.fromJSON(arrow.BinaryTypes.String, `["table", "table", "table"]`) + defer tableType.Release() + + expectedRec := array.NewRecord(schema_ref.Tables, []arrow.Array{catalogName, schemaName, tableName, tableType}, 3) + defer expectedRec.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.NotNil(rec) + rec.Retain() + defer rec.Release() + s.False(rdr.Next()) + + s.Truef(array.RecordEqual(expectedRec, rec), "expected: %s\ngot: %s", expectedRec, rec) +} + +func (s *FlightSqliteServerSuite) TestCommandGetTablesWithTableFilter() { + ctx := context.Background() + info, err := s.cl.GetTables(ctx, &flightsql.GetTablesOpts{ + TableNameFilterPattern: proto.String("int%"), + }) + s.NoError(err) + s.NotNil(info) + + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + catalog := s.fromJSON(arrow.BinaryTypes.String, `[null]`) + schema := s.fromJSON(arrow.BinaryTypes.String, `[null]`) + table := s.fromJSON(arrow.BinaryTypes.String, `["intTable"]`) + tabletype := s.fromJSON(arrow.BinaryTypes.String, `["table"]`) + expected := array.NewRecord(schema_ref.Tables, []arrow.Array{catalog, schema, table, tabletype}, 1) + defer func() { + catalog.Release() + schema.Release() + table.Release() + tabletype.Release() + expected.Release() + }() + + s.True(rdr.Next()) + rec := rdr.Record() + s.NotNil(rec) + rec.Retain() + defer rec.Release() + s.False(rdr.Next()) + s.NoError(rdr.Err()) + + s.Truef(array.RecordEqual(expected, rec), "expected: %s\ngot: %s", expected, rec) +} + +func (s *FlightSqliteServerSuite) TestCommandGetTablesWithTableTypesFilter() { + ctx := context.Background() + info, err := s.cl.GetTables(ctx, &flightsql.GetTablesOpts{ + TableTypes: []string{"index"}, + }) + s.NoError(err) + + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + s.True(schema_ref.Tables.Equal(rdr.Schema()), rdr.Schema().String()) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandGetTablesWithExistingTableTypeFilter() { + ctx := context.Background() + info, err := s.cl.GetTables(ctx, &flightsql.GetTablesOpts{ + TableTypes: []string{"table"}, + }) + s.NoError(err) + s.NotNil(info) + + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + catalogName := array.MakeArrayOfNull(s.mem, arrow.BinaryTypes.String, 3) + defer catalogName.Release() + schemaName := array.MakeArrayOfNull(s.mem, arrow.BinaryTypes.String, 3) + defer schemaName.Release() + + tableName := s.fromJSON(arrow.BinaryTypes.String, `["foreignTable", "intTable", "sqlite_sequence"]`) + defer tableName.Release() + + tableType := s.fromJSON(arrow.BinaryTypes.String, `["table", "table", "table"]`) + defer tableType.Release() + + expectedRec := array.NewRecord(schema_ref.Tables, []arrow.Array{catalogName, schemaName, tableName, tableType}, 3) + defer expectedRec.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.NotNil(rec) + rec.Retain() + defer rec.Release() + s.False(rdr.Next()) + + s.Truef(array.RecordEqual(expectedRec, rec), "expected: %s\ngot: %s", expectedRec, rec) +} + +func (s *FlightSqliteServerSuite) TestCommandGetTablesWithIncludedSchemas() { + ctx := context.Background() + info, err := s.cl.GetTables(ctx, &flightsql.GetTablesOpts{ + TableNameFilterPattern: proto.String("int%"), + IncludeSchema: true, + }) + s.NoError(err) + s.NotNil(info) + + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + catalog := s.fromJSON(arrow.BinaryTypes.String, `[null]`) + schema := s.fromJSON(arrow.BinaryTypes.String, `[null]`) + table := s.fromJSON(arrow.BinaryTypes.String, `["intTable"]`) + tabletype := s.fromJSON(arrow.BinaryTypes.String, `["table"]`) + + dbTableName := "intTable" + + tableSchema := arrow.NewSchema([]arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int64, + Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, dbTableName)}, + {Name: "keyName", Type: arrow.BinaryTypes.String, + Metadata: s.getColMetadata(sqlite3.SQLITE_TEXT, dbTableName)}, + {Name: "value", Type: arrow.PrimitiveTypes.Int64, + Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, dbTableName)}, + {Name: "foreignId", Type: arrow.PrimitiveTypes.Int64, + Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, dbTableName)}, + }, nil) + schemaBuf := flight.SerializeSchema(tableSchema, s.mem) + binaryBldr := array.NewBinaryBuilder(s.mem, arrow.BinaryTypes.Binary) + binaryBldr.Append(schemaBuf) + schemaCol := binaryBldr.NewArray() + + expected := array.NewRecord(schema_ref.TablesWithIncludedSchema, []arrow.Array{catalog, schema, table, tabletype, schemaCol}, 1) + defer func() { + catalog.Release() + schema.Release() + table.Release() + tabletype.Release() + binaryBldr.Release() + schemaCol.Release() + expected.Release() + }() + + s.True(rdr.Next()) + rec := rdr.Record() + s.NotNil(rec) + rec.Retain() + defer rec.Release() + s.False(rdr.Next()) + s.NoError(rdr.Err()) + + s.Truef(array.RecordEqual(expected, rec), "expected: %s\ngot: %s", expected, rec) +} + +func (s *FlightSqliteServerSuite) TestCommandGetTypeInfo() { + ctx := context.Background() + info, err := s.cl.GetXdbcTypeInfo(ctx, nil) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + expected := example.GetTypeInfoResult(s.mem) + defer expected.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.Truef(array.RecordEqual(expected, rec), "expected: %s\ngot: %s", expected, rec) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandGetTypeInfoFiltered() { + ctx := context.Background() + info, err := s.cl.GetXdbcTypeInfo(ctx, proto.Int32(-4)) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + expected := example.GetFilteredTypeInfoResult(s.mem, -4) + defer expected.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.Truef(array.RecordEqual(expected, rec), "expected: %s\ngot: %s", expected, rec) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandGetCatalogs() { + ctx := context.Background() + info, err := s.cl.GetCatalogs(ctx) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + s.True(rdr.Schema().Equal(schema_ref.Catalogs), rdr.Schema().String()) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandGetDbSchemas() { + ctx := context.Background() + info, err := s.cl.GetDBSchemas(ctx, &flightsql.GetDBSchemasOpts{}) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + s.True(rdr.Schema().Equal(schema_ref.DBSchemas), rdr.Schema().String()) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandGetTableTypes() { + ctx := context.Background() + info, err := s.cl.GetTableTypes(ctx) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + expected := s.fromJSON(arrow.BinaryTypes.String, `["table"]`) + defer expected.Release() + expectedRec := array.NewRecord(schema_ref.TableTypes, []arrow.Array{expected}, 1) + defer expectedRec.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.Truef(array.RecordEqual(expectedRec, rec), "expected: %s\ngot: %s", expected, rec) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandStatementUpdate() { + ctx := context.Background() + result, err := s.cl.ExecuteUpdate(ctx, `INSERT INTO intTable (keyName, value) VALUES + ('KEYNAME1', 1001), ('KEYNAME2', 1002), ('KEYNAME3', 1003)`) + s.NoError(err) + s.EqualValues(3, result) + + result, err = s.cl.ExecuteUpdate(ctx, `UPDATE intTable SET keyName = 'KEYNAME1' + WHERE keyName = 'KEYNAME2' OR keyName = 'KEYNAME3'`) + s.NoError(err) + s.EqualValues(2, result) + + result, err = s.cl.ExecuteUpdate(ctx, `DELETE FROM intTable WHERE keyName = 'KEYNAME1'`) + s.NoError(err) + s.EqualValues(3, result) +} + +func (s *FlightSqliteServerSuite) TestCommandPreparedStatementQuery() { + ctx := context.Background() + prep, err := s.cl.Prepare(ctx, s.mem, "SELECT * FROM intTable") + s.NoError(err) + defer prep.Close(ctx) + + info, err := prep.Execute(ctx) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + + expectedSchema := arrow.NewSchema([]arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int64, Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, ""), Nullable: true}, + {Name: "keyName", Type: arrow.BinaryTypes.String, Metadata: s.getColMetadata(sqlite3.SQLITE_TEXT, ""), Nullable: true}, + {Name: "value", Type: arrow.PrimitiveTypes.Int64, Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, ""), Nullable: true}, + {Name: "foreignId", Type: arrow.PrimitiveTypes.Int64, Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, ""), Nullable: true}}, nil) + + idArr := s.fromJSON(arrow.PrimitiveTypes.Int64, `[1, 2, 3, 4]`) + defer idArr.Release() + keyNameArr := s.fromJSON(arrow.BinaryTypes.String, `["one", "zero", "negative one", null]`) + defer keyNameArr.Release() + valueArr := s.fromJSON(arrow.PrimitiveTypes.Int64, `[1, 0, -1, null]`) + defer valueArr.Release() + foreignIdArr := s.fromJSON(arrow.PrimitiveTypes.Int64, `[1, 1, 1, null]`) + defer foreignIdArr.Release() + + expected := array.NewRecord(expectedSchema, []arrow.Array{idArr, keyNameArr, valueArr, foreignIdArr}, 4) + defer expected.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.Truef(array.RecordEqual(expected, rec), "expected: %s\ngot: %s", expected, rec) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandPreparedStatementQueryWithParams() { + ctx := context.Background() + stmt, err := s.cl.Prepare(ctx, s.mem, "SELECT * FROM intTable WHERE keyName LIKE ?") + s.NoError(err) + defer stmt.Close(ctx) + + typeIDs := s.fromJSON(arrow.PrimitiveTypes.Int8, "[0]") + offsets := s.fromJSON(arrow.PrimitiveTypes.Int32, "[0]") + strArray := s.fromJSON(arrow.BinaryTypes.String, `["%one"]`) + bytesArr := s.fromJSON(arrow.BinaryTypes.Binary, "[]") + bigintArr := s.fromJSON(arrow.PrimitiveTypes.Int64, "[]") + dblArr := s.fromJSON(arrow.PrimitiveTypes.Float64, "[]") + paramArr, _ := array.NewDenseUnionFromArraysWithFields(typeIDs, + offsets, []arrow.Array{strArray, bytesArr, bigintArr, dblArr}, + []string{"string", "bytes", "bigint", "double"}) + batch := array.NewRecord(arrow.NewSchema([]arrow.Field{ + {Name: "parameter_1", Type: paramArr.DataType()}}, nil), + []arrow.Array{paramArr}, 1) + defer func() { + typeIDs.Release() + offsets.Release() + strArray.Release() + bytesArr.Release() + bigintArr.Release() + dblArr.Release() + paramArr.Release() + batch.Release() + }() + + stmt.SetParameters(batch) + info, err := stmt.Execute(ctx) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + + expectedSchema := arrow.NewSchema([]arrow.Field{ + {Name: "id", Type: arrow.PrimitiveTypes.Int64, Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, ""), Nullable: true}, + {Name: "keyName", Type: arrow.BinaryTypes.String, Metadata: s.getColMetadata(sqlite3.SQLITE_TEXT, ""), Nullable: true}, + {Name: "value", Type: arrow.PrimitiveTypes.Int64, Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, ""), Nullable: true}, + {Name: "foreignId", Type: arrow.PrimitiveTypes.Int64, Metadata: s.getColMetadata(sqlite3.SQLITE_INTEGER, ""), Nullable: true}}, nil) + + idArr := s.fromJSON(arrow.PrimitiveTypes.Int64, `[1, 3]`) + defer idArr.Release() + keyNameArr := s.fromJSON(arrow.BinaryTypes.String, `["one", "negative one"]`) + defer keyNameArr.Release() + valueArr := s.fromJSON(arrow.PrimitiveTypes.Int64, `[1, -1]`) + defer valueArr.Release() + foreignIdArr := s.fromJSON(arrow.PrimitiveTypes.Int64, `[1, 1]`) + defer foreignIdArr.Release() + + expected := array.NewRecord(expectedSchema, []arrow.Array{idArr, keyNameArr, valueArr, foreignIdArr}, 2) + defer expected.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.Truef(array.RecordEqual(expected, rec), "expected: %s\ngot: %s", expected, rec) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandPreparedStatementUpdateWithParams() { + ctx := context.Background() + stmt, err := s.cl.Prepare(ctx, s.mem, "INSERT INTO intTable (keyName, value) VALUES ('new_value', ?)") + s.NoError(err) + defer stmt.Close(ctx) + + typeIDs := s.fromJSON(arrow.PrimitiveTypes.Int8, "[2]") + offsets := s.fromJSON(arrow.PrimitiveTypes.Int32, "[0]") + strArray := s.fromJSON(arrow.BinaryTypes.String, "[]") + bytesArr := s.fromJSON(arrow.BinaryTypes.Binary, "[]") + bigintArr := s.fromJSON(arrow.PrimitiveTypes.Int64, "[999]") + dblArr := s.fromJSON(arrow.PrimitiveTypes.Float64, "[]") + paramArr, err := array.NewDenseUnionFromArraysWithFields(typeIDs, + offsets, []arrow.Array{strArray, bytesArr, bigintArr, dblArr}, + []string{"string", "bytes", "bigint", "double"}) + s.NoError(err) + batch := array.NewRecord(arrow.NewSchema([]arrow.Field{ + {Name: "parameter_1", Type: paramArr.DataType()}}, nil), + []arrow.Array{paramArr}, 1) + defer func() { + typeIDs.Release() + offsets.Release() + strArray.Release() + bytesArr.Release() + bigintArr.Release() + dblArr.Release() + paramArr.Release() + batch.Release() + }() + + stmt.SetParameters(batch) + s.EqualValues(4, s.execCountQuery("SELECT COUNT(*) FROM intTable")) + n, err := stmt.ExecuteUpdate(context.Background()) + s.NoError(err) + s.EqualValues(1, n) + s.EqualValues(5, s.execCountQuery("SELECT COUNT(*) FROM intTable")) + n, err = s.cl.ExecuteUpdate(context.Background(), "DELETE FROM intTable WHERE keyName = 'new_value'") + s.NoError(err) + s.EqualValues(1, n) + s.EqualValues(4, s.execCountQuery("SELECT COUNT(*) FROM intTable")) +} + +func (s *FlightSqliteServerSuite) TestCommandPreparedStatementUpdate() { + ctx := context.Background() + stmt, err := s.cl.Prepare(ctx, s.mem, "INSERT INTO intTable (keyName, value) VALUES ('new_value', 999)") + s.NoError(err) + defer stmt.Close(ctx) + + s.EqualValues(4, s.execCountQuery("SELECT COUNT(*) FROM intTable")) + result, err := stmt.ExecuteUpdate(ctx) + s.NoError(err) + s.EqualValues(1, result) + s.EqualValues(5, s.execCountQuery("SELECT COUNT(*) FROM intTable")) + result, err = s.cl.ExecuteUpdate(ctx, "DELETE FROM intTable WHERE keyName = 'new_value'") + s.NoError(err) + s.EqualValues(1, result) + s.EqualValues(4, s.execCountQuery("SELECT COUNT(*) FROM intTable")) +} + +func (s *FlightSqliteServerSuite) TestCommandGetPrimaryKeys() { + ctx := context.Background() + info, err := s.cl.GetPrimaryKeys(ctx, flightsql.TableRef{Table: "int%"}) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + bldr := array.NewRecordBuilder(s.mem, schema_ref.PrimaryKeys) + defer bldr.Release() + bldr.Field(0).AppendNull() + bldr.Field(1).AppendNull() + bldr.Field(2).(*array.StringBuilder).Append("intTable") + bldr.Field(3).(*array.StringBuilder).Append("id") + bldr.Field(4).(*array.Int32Builder).Append(1) + bldr.Field(5).AppendNull() + expected := bldr.NewRecord() + defer expected.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.Truef(array.RecordEqual(expected, rec), "expected: %s\ngot: %s", expected, rec) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandGetImportedKeys() { + ctx := context.Background() + info, err := s.cl.GetImportedKeys(ctx, flightsql.TableRef{Table: "intTable"}) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + bldr := array.NewRecordBuilder(s.mem, schema_ref.ImportedKeys) + defer bldr.Release() + bldr.Field(0).AppendNull() + bldr.Field(1).AppendNull() + bldr.Field(2).(*array.StringBuilder).Append("foreignTable") + bldr.Field(3).(*array.StringBuilder).Append("id") + bldr.Field(4).AppendNull() + bldr.Field(5).AppendNull() + bldr.Field(6).(*array.StringBuilder).Append("intTable") + bldr.Field(7).(*array.StringBuilder).Append("foreignId") + bldr.Field(8).(*array.Int32Builder).Append(0) + bldr.Field(9).AppendNull() + bldr.Field(10).AppendNull() + bldr.Field(11).(*array.Uint8Builder).Append(3) + bldr.Field(12).(*array.Uint8Builder).Append(3) + expected := bldr.NewRecord() + defer expected.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.Truef(array.RecordEqual(expected, rec), "expected: %s\ngot: %s", expected, rec) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandGetExportedKeys() { + ctx := context.Background() + info, err := s.cl.GetExportedKeys(ctx, flightsql.TableRef{Table: "foreignTable"}) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + bldr := array.NewRecordBuilder(s.mem, schema_ref.ImportedKeys) + defer bldr.Release() + bldr.Field(0).AppendNull() + bldr.Field(1).AppendNull() + bldr.Field(2).(*array.StringBuilder).Append("foreignTable") + bldr.Field(3).(*array.StringBuilder).Append("id") + bldr.Field(4).AppendNull() + bldr.Field(5).AppendNull() + bldr.Field(6).(*array.StringBuilder).Append("intTable") + bldr.Field(7).(*array.StringBuilder).Append("foreignId") + bldr.Field(8).(*array.Int32Builder).Append(0) + bldr.Field(9).AppendNull() + bldr.Field(10).AppendNull() + bldr.Field(11).(*array.Uint8Builder).Append(3) + bldr.Field(12).(*array.Uint8Builder).Append(3) + expected := bldr.NewRecord() + defer expected.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.Truef(array.RecordEqual(expected, rec), "expected: %s\ngot: %s", expected, rec) + s.False(rdr.Next()) +} + +func (s *FlightSqliteServerSuite) TestCommandGetCrossRef() { + ctx := context.Background() + info, err := s.cl.GetCrossReference(ctx, + flightsql.TableRef{Table: "foreignTable"}, + flightsql.TableRef{Table: "intTable"}) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + bldr := array.NewRecordBuilder(s.mem, schema_ref.ImportedKeys) + defer bldr.Release() + bldr.Field(0).AppendNull() + bldr.Field(1).AppendNull() + bldr.Field(2).(*array.StringBuilder).Append("foreignTable") + bldr.Field(3).(*array.StringBuilder).Append("id") + bldr.Field(4).AppendNull() + bldr.Field(5).AppendNull() + bldr.Field(6).(*array.StringBuilder).Append("intTable") + bldr.Field(7).(*array.StringBuilder).Append("foreignId") + bldr.Field(8).(*array.Int32Builder).Append(0) + bldr.Field(9).AppendNull() + bldr.Field(10).AppendNull() + bldr.Field(11).(*array.Uint8Builder).Append(3) + bldr.Field(12).(*array.Uint8Builder).Append(3) + expected := bldr.NewRecord() + defer expected.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + s.Truef(array.RecordEqual(expected, rec), "expected: %s\ngot: %s", expected, rec) + s.False(rdr.Next()) +} + +func validateSqlInfo(t *testing.T, expected interface{}, sc scalar.Scalar) bool { + switch ex := expected.(type) { + case string: + return assert.Equal(t, ex, sc.String()) + case bool: + return assert.Equal(t, ex, sc.(*scalar.Boolean).Value) + case int64: + return assert.Equal(t, ex, sc.(*scalar.Int64).Value) + case int32: + return assert.Equal(t, ex, sc.(*scalar.Int32).Value) + case []string: + arr := sc.(*scalar.List).Value.(*array.String) + assert.EqualValues(t, len(ex), arr.Len()) + for i, v := range ex { + assert.Equal(t, v, arr.Value(i)) + } + case map[int32][]int32: + // map is a list of structs with key and values + structArr := sc.(*scalar.Map).Value.(*array.Struct) + keys := structArr.Field(0).(*array.Int32) + values := structArr.Field(1).(*array.List) + // assert that the map has the right size + assert.EqualValues(t, len(ex), keys.Len()) + + // for each element, match the argument + for i := 0; i < keys.Len(); i++ { + keyScalar, _ := scalar.GetScalar(keys, i) + infoID := keyScalar.(*scalar.Int32).Value + + // assert the key exists + list, ok := ex[infoID] + assert.True(t, ok) + + // assert the int32list is the right size + start, end := values.ValueOffsets(i) + assert.EqualValues(t, len(list), end-start) + + // for each element make sure it matches + for j, v := range list { + listItem, err := scalar.GetScalar(values.ListValues(), int(start)+j) + assert.NoError(t, err) + assert.Equal(t, v, listItem.(*scalar.Int32).Value) + } + } + } + return true +} + +func (s *FlightSqliteServerSuite) TestCommandGetSqlInfo() { + expectedResults := example.SqlInfoResultMap() + infoIDs := make([]flightsql.SqlInfo, 0, len(expectedResults)) + for k := range expectedResults { + infoIDs = append(infoIDs, flightsql.SqlInfo(k)) + } + + ctx := context.Background() + info, err := s.cl.GetSqlInfo(ctx, infoIDs) + s.NoError(err) + rdr, err := s.cl.DoGet(ctx, info.Endpoint[0].Ticket) + s.NoError(err) + defer rdr.Release() + + s.True(rdr.Next()) + rec := rdr.Record() + rec.Retain() + defer rec.Release() + s.False(rdr.Next()) + + s.EqualValues(2, rec.NumCols()) + s.EqualValues(len(expectedResults), rec.NumRows()) + + colName := rec.Column(0).(*array.Uint32) + colValue := rec.Column(1) + for i := 0; i < int(rec.NumRows()); i++ { + expected := expectedResults[colName.Value(i)] + sc, err := scalar.GetScalar(colValue, i) + s.NoError(err) + + s.True(validateSqlInfo(s.T(), expected, sc.(*scalar.DenseUnion).ChildValue())) + + sc.(*scalar.DenseUnion).Release() + } +} + +func TestSqliteServer(t *testing.T) { + suite.Run(t, new(FlightSqliteServerSuite)) +} diff --git a/go/arrow/flight/flightsql/types.go b/go/arrow/flight/flightsql/types.go new file mode 100644 index 0000000000000..5e033d00ee322 --- /dev/null +++ b/go/arrow/flight/flightsql/types.go @@ -0,0 +1,745 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package flightsql + +import ( + pb "github.com/apache/arrow/go/v10/arrow/flight/internal/flight" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" +) + +// Constants for Action types +const ( + CreatePreparedStatementActionType = "CreatePreparedStatement" + ClosePreparedStatementActionType = "ClosePreparedStatement" +) + +func toCrossTableRef(cmd *pb.CommandGetCrossReference) CrossTableRef { + return CrossTableRef{ + PKRef: TableRef{ + Catalog: cmd.PkCatalog, + DBSchema: cmd.PkDbSchema, + Table: cmd.PkTable, + }, + FKRef: TableRef{ + Catalog: cmd.FkCatalog, + DBSchema: cmd.FkDbSchema, + Table: cmd.FkTable, + }, + } +} + +func pkToTableRef(cmd *pb.CommandGetPrimaryKeys) TableRef { + return TableRef{ + Catalog: cmd.Catalog, + DBSchema: cmd.DbSchema, + Table: cmd.Table, + } +} + +func exkToTableRef(cmd *pb.CommandGetExportedKeys) TableRef { + return TableRef{ + Catalog: cmd.Catalog, + DBSchema: cmd.DbSchema, + Table: cmd.Table, + } +} + +func impkToTableRef(cmd *pb.CommandGetImportedKeys) TableRef { + return TableRef{ + Catalog: cmd.Catalog, + DBSchema: cmd.DbSchema, + Table: cmd.Table, + } +} + +// CreateStatementQueryTicket is a helper that constructs a properly +// serialized TicketStatementQuery containing a given opaque binary handle +// for use with constructing a ticket to return from GetFlightInfoStatement. +func CreateStatementQueryTicket(handle []byte) ([]byte, error) { + query := &pb.TicketStatementQuery{StatementHandle: handle} + var ticket anypb.Any + ticket.MarshalFrom(query) + + return proto.Marshal(&ticket) +} + +type ( + // GetDBSchemasOpts contains the options to request Database Schemas: + // an optional Catalog and a Schema Name filter pattern. + GetDBSchemasOpts pb.CommandGetDbSchemas + // GetTablesOpts contains the options for retrieving a list of tables: + // optional Catalog, Schema filter pattern, Table name filter pattern, + // a filter of table types, and whether or not to include the schema + // in the response. + GetTablesOpts pb.CommandGetTables + + // SqlInfoResultMap is a mapping of SqlInfo ids to the desired response. + // This is part of a Server and used for registering responses to a + // SqlInfo request. + SqlInfoResultMap map[uint32]interface{} + + // TableRef is a helpful struct for referencing a specific Table + // by its catalog, schema, and table name. + TableRef struct { + // Catalog specifies the catalog this table belongs to. + // An empty string refers to tables without a catalog. + // If nil, can reference a table in any catalog. + Catalog *string + // DBSchema specifies the database schema the table belongs to. + // An empty string refers to a table which does not belong to + // a database schema. + // If nil, can reference a table in any database schema. + DBSchema *string + // Table is the name of the table that is being referenced. + Table string + } + + // CrossTableRef contains a reference to a Primary Key table + // and a Foreign Key table. + CrossTableRef struct { + PKRef TableRef + FKRef TableRef + } + + // since we are hiding the Protobuf internals in an internal + // package, we need to provide enum values for the SqlInfo enum here + SqlInfo uint32 +) + +// SqlInfo enum values +const ( + // Server Information + // Values [0-500): Provide information about the Flight SQL Server itself + + // Retrieves a UTF-8 string with the name of the Flight SQL Server. + SqlInfoFlightSqlServerName = SqlInfo(pb.SqlInfo_FLIGHT_SQL_SERVER_NAME) + // Retrieves a UTF-8 string with the native version of the Flight SQL Server. + SqlInfoFlightSqlServerVersion = SqlInfo(pb.SqlInfo_FLIGHT_SQL_SERVER_VERSION) + // Retrieves a UTF-8 string with the Arrow format version of the Flight SQL Server. + SqlInfoFlightSqlServerArrowVersion = SqlInfo(pb.SqlInfo_FLIGHT_SQL_SERVER_ARROW_VERSION) + + // Retrieves a boolean value indicating whether the Flight SQL Server is read only. + // + // Returns: + // - false: if read-write + // - true: if read only + SqlInfoFlightSqlServerReadOnly = SqlInfo(pb.SqlInfo_FLIGHT_SQL_SERVER_READ_ONLY) + + // SQL Syntax Information + // Values [500-1000): provide information about the supported SQL Syntax + + // Retrieves a boolean value indicating whether the Flight SQL Server supports CREATE and DROP of catalogs. + // + // Returns: + // - false: if it doesn't support CREATE and DROP of catalogs. + // - true: if it supports CREATE and DROP of catalogs. + SqlInfoDDLCatalog = SqlInfo(pb.SqlInfo_SQL_DDL_CATALOG) + + // Retrieves a boolean value indicating whether the Flight SQL Server supports CREATE and DROP of schemas. + // + // Returns: + // - false: if it doesn't support CREATE and DROP of schemas. + // - true: if it supports CREATE and DROP of schemas. + SqlInfoDDLSchema = SqlInfo(pb.SqlInfo_SQL_DDL_SCHEMA) + + // Indicates whether the Flight SQL Server supports CREATE and DROP of tables. + // + // Returns: + // - false: if it doesn't support CREATE and DROP of tables. + // - true: if it supports CREATE and DROP of tables. + SqlInfoDDLTable = SqlInfo(pb.SqlInfo_SQL_DDL_TABLE) + + // Retrieves a int32 ordinal representing the case sensitivity of catalog, table, schema and table names. + // + // The possible values are listed in `arrow.flight.protocol.sql.SqlSupportedCaseSensitivity`. + SqlInfoIdentifierCase = SqlInfo(pb.SqlInfo_SQL_IDENTIFIER_CASE) + // Retrieves a UTF-8 string with the supported character(s) used to surround a delimited identifier. + SqlInfoIdentifierQuoteChar = SqlInfo(pb.SqlInfo_SQL_IDENTIFIER_QUOTE_CHAR) + + // Retrieves a int32 describing the case sensitivity of quoted identifiers. + // + // The possible values are listed in `arrow.flight.protocol.sql.SqlSupportedCaseSensitivity`. + SqlInfoQuotedIdentifierCase = SqlInfo(pb.SqlInfo_SQL_QUOTED_IDENTIFIER_CASE) + + // Retrieves a boolean value indicating whether all tables are selectable. + // + // Returns: + // - false: if not all tables are selectable or if none are; + // - true: if all tables are selectable. + SqlInfoAllTablesAreASelectable = SqlInfo(pb.SqlInfo_SQL_ALL_TABLES_ARE_SELECTABLE) + + // Retrieves the null ordering. + // + // Returns a int32 ordinal for the null ordering being used, as described in + // `arrow.flight.protocol.sql.SqlNullOrdering`. + SqlInfoNullOrdering = SqlInfo(pb.SqlInfo_SQL_NULL_ORDERING) + // Retrieves a UTF-8 string list with values of the supported keywords. + SqlInfoKeywords = SqlInfo(pb.SqlInfo_SQL_KEYWORDS) + // Retrieves a UTF-8 string list with values of the supported numeric functions. + SqlInfoNumericFunctions = SqlInfo(pb.SqlInfo_SQL_NUMERIC_FUNCTIONS) + // Retrieves a UTF-8 string list with values of the supported string functions. + SqlInfoStringFunctions = SqlInfo(pb.SqlInfo_SQL_STRING_FUNCTIONS) + // Retrieves a UTF-8 string list with values of the supported system functions. + SqlInfoSystemFunctions = SqlInfo(pb.SqlInfo_SQL_SYSTEM_FUNCTIONS) + // Retrieves a UTF-8 string list with values of the supported datetime functions. + SqlInfoDateTimeFunctions = SqlInfo(pb.SqlInfo_SQL_DATETIME_FUNCTIONS) + + // Retrieves the UTF-8 string that can be used to escape wildcard characters. + // This is the string that can be used to escape '_' or '%' in the catalog search parameters that are a pattern + // (and therefore use one of the wildcard characters). + // The '_' character represents any single character; the '%' character represents any sequence of zero or more + // characters. + SqlInfoSearchStringEscape = SqlInfo(pb.SqlInfo_SQL_SEARCH_STRING_ESCAPE) + + // Retrieves a UTF-8 string with all the "extra" characters that can be used in unquoted identifier names + // (those beyond a-z, A-Z, 0-9 and _). + SqlInfoExtraNameChars = SqlInfo(pb.SqlInfo_SQL_EXTRA_NAME_CHARACTERS) + + // Retrieves a boolean value indicating whether column aliasing is supported. + // If so, the SQL AS clause can be used to provide names for computed columns or to provide alias names for columns + // as required. + // + // Returns: + // - false: if column aliasing is unsupported; + // - true: if column aliasing is supported. + SqlInfoSupportsColumnAliasing = SqlInfo(pb.SqlInfo_SQL_SUPPORTS_COLUMN_ALIASING) + + // Retrieves a boolean value indicating whether concatenations between null and non-null values being + // null are supported. + // + // - Returns: + // - false: if concatenations between null and non-null values being null are unsupported; + // - true: if concatenations between null and non-null values being null are supported. + SqlInfoNullPlusNullIsNull = SqlInfo(pb.SqlInfo_SQL_NULL_PLUS_NULL_IS_NULL) + + // Retrieves a map where the key is the type to convert from and the value is a list with the types to convert to, + // indicating the supported conversions. Each key and each item on the list value is a value to a predefined type on + // SqlSupportsConvert enum. + // The returned map will be: map> + SqlInfoSupportsConvert = SqlInfo(pb.SqlInfo_SQL_SUPPORTS_CONVERT) + + // Retrieves a boolean value indicating whether, when table correlation names are supported, + // they are restricted to being different from the names of the tables. + // + // Returns: + // - false: if table correlation names are unsupported; + // - true: if table correlation names are supported. + SqlInfoSupportsTableCorrelationNames = SqlInfo(pb.SqlInfo_SQL_SUPPORTS_TABLE_CORRELATION_NAMES) + + // Retrieves a boolean value indicating whether, when table correlation names are supported, + // they are restricted to being different from the names of the tables. + // + // Returns: + // - false: if different table correlation names are unsupported; + // - true: if different table correlation names are supported + SqlInfoSupportsDifferentTableCorrelationNames = SqlInfo(pb.SqlInfo_SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES) + + // Retrieves a boolean value indicating whether expressions in ORDER BY lists are supported. + // + // Returns: + // - false: if expressions in ORDER BY are unsupported; + // - true: if expressions in ORDER BY are supported; + SqlInfoSupportsExpressionsInOrderBy = SqlInfo(pb.SqlInfo_SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY) + + // Retrieves a boolean value indicating whether using a column that is not in the SELECT statement in a GROUP BY + // clause is supported. + // + // Returns: + // - false: if using a column that is not in the SELECT statement in a GROUP BY clause is unsupported; + // - true: if using a column that is not in the SELECT statement in a GROUP BY clause is supported. + SqlInfoSupportsOrderByUnrelated = SqlInfo(pb.SqlInfo_SQL_SUPPORTS_ORDER_BY_UNRELATED) + + // Retrieves the supported GROUP BY commands; + // + // Returns an int32 bitmask value representing the supported commands. + // The returned bitmask should be parsed in order to retrieve the supported commands. + // + // For instance: + // - return 0 (\b0) => [] (GROUP BY is unsupported); + // - return 1 (\b1) => [SQL_GROUP_BY_UNRELATED]; + // - return 2 (\b10) => [SQL_GROUP_BY_BEYOND_SELECT]; + // - return 3 (\b11) => [SQL_GROUP_BY_UNRELATED, SQL_GROUP_BY_BEYOND_SELECT]. + // Valid GROUP BY types are described under `arrow.flight.protocol.sql.SqlSupportedGroupBy`. + SqlInfoSupportedGroupBy = SqlInfo(pb.SqlInfo_SQL_SUPPORTED_GROUP_BY) + + // Retrieves a boolean value indicating whether specifying a LIKE escape clause is supported. + // + // Returns: + // - false: if specifying a LIKE escape clause is unsupported; + // - true: if specifying a LIKE escape clause is supported. + SqlInfoSupportsLikeEscapeClause = SqlInfo(pb.SqlInfo_SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE) + + // Retrieves a boolean value indicating whether columns may be defined as non-nullable. + // + // Returns: + // - false: if columns cannot be defined as non-nullable; + // - true: if columns may be defined as non-nullable. + SqlInfoSupportsNonNullableColumns = SqlInfo(pb.SqlInfo_SQL_SUPPORTS_NON_NULLABLE_COLUMNS) + + // Retrieves the supported SQL grammar level as per the ODBC specification. + // + // Returns an int32 bitmask value representing the supported SQL grammar level. + // The returned bitmask should be parsed in order to retrieve the supported grammar levels. + // + // For instance: + // - return 0 (\b0) => [] (SQL grammar is unsupported); + // - return 1 (\b1) => [SQL_MINIMUM_GRAMMAR]; + // - return 2 (\b10) => [SQL_CORE_GRAMMAR]; + // - return 3 (\b11) => [SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR]; + // - return 4 (\b100) => [SQL_EXTENDED_GRAMMAR]; + // - return 5 (\b101) => [SQL_MINIMUM_GRAMMAR, SQL_EXTENDED_GRAMMAR]; + // - return 6 (\b110) => [SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR]; + // - return 7 (\b111) => [SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR]. + // Valid SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedSqlGrammar`. + SqlInfoSupportedGrammar = SqlInfo(pb.SqlInfo_SQL_SUPPORTED_GRAMMAR) + + // Retrieves the supported ANSI92 SQL grammar level. + // + // Returns an int32 bitmask value representing the supported ANSI92 SQL grammar level. + // The returned bitmask should be parsed in order to retrieve the supported commands. + // + // For instance: + // - return 0 (\b0) => [] (ANSI92 SQL grammar is unsupported); + // - return 1 (\b1) => [ANSI92_ENTRY_SQL]; + // - return 2 (\b10) => [ANSI92_INTERMEDIATE_SQL]; + // - return 3 (\b11) => [ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL]; + // - return 4 (\b100) => [ANSI92_FULL_SQL]; + // - return 5 (\b101) => [ANSI92_ENTRY_SQL, ANSI92_FULL_SQL]; + // - return 6 (\b110) => [ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL]; + // - return 7 (\b111) => [ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL]. + // Valid ANSI92 SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedAnsi92SqlGrammarLevel`. + SqlInfoANSI92SupportedLevel = SqlInfo(pb.SqlInfo_SQL_ANSI92_SUPPORTED_LEVEL) + + // Retrieves a boolean value indicating whether the SQL Integrity Enhancement Facility is supported. + // + // Returns: + // - false: if the SQL Integrity Enhancement Facility is supported; + // - true: if the SQL Integrity Enhancement Facility is supported. + SqlInfoSupportsIntegrityEnhancementFacility = SqlInfo(pb.SqlInfo_SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY) + + // Retrieves the support level for SQL OUTER JOINs. + // + // Returns a int32 ordinal for the SQL ordering being used, as described in + // `arrow.flight.protocol.sql.SqlOuterJoinsSupportLevel`. + SqlInfoOuterJoinsSupportLevel = SqlInfo(pb.SqlInfo_SQL_OUTER_JOINS_SUPPORT_LEVEL) + + // Retrieves a UTF-8 string with the preferred term for "schema". + SqlInfoSchemaTerm = SqlInfo(pb.SqlInfo_SQL_SCHEMA_TERM) + // Retrieves a UTF-8 string with the preferred term for "procedure". + SqlInfoProcedureTerm = SqlInfo(pb.SqlInfo_SQL_PROCEDURE_TERM) + + // Retrieves a UTF-8 string with the preferred term for "catalog". + // If a empty string is returned its assumed that the server does NOT supports catalogs. + SqlInfoCatalogTerm = SqlInfo(pb.SqlInfo_SQL_CATALOG_TERM) + + // Retrieves a boolean value indicating whether a catalog appears at the start of a fully qualified table name. + // + // - false: if a catalog does not appear at the start of a fully qualified table name; + // - true: if a catalog appears at the start of a fully qualified table name. + SqlInfoCatalogAtStart = SqlInfo(pb.SqlInfo_SQL_CATALOG_AT_START) + + // Retrieves the supported actions for a SQL schema. + // + // Returns an int32 bitmask value representing the supported actions for a SQL schema. + // The returned bitmask should be parsed in order to retrieve the supported actions for a SQL schema. + // + // For instance: + // - return 0 (\b0) => [] (no supported actions for SQL schema); + // - return 1 (\b1) => [SQL_ELEMENT_IN_PROCEDURE_CALLS]; + // - return 2 (\b10) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS]; + // - return 3 (\b11) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS]; + // - return 4 (\b100) => [SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 5 (\b101) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 6 (\b110) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]. + // Valid actions for a SQL schema described under `arrow.flight.protocol.sql.SqlSupportedElementActions`. + SqlInfoSchemasSupportedActions = SqlInfo(pb.SqlInfo_SQL_SCHEMAS_SUPPORTED_ACTIONS) + + // Retrieves the supported actions for a SQL schema. + // + // Returns an int32 bitmask value representing the supported actions for a SQL catalog. + // The returned bitmask should be parsed in order to retrieve the supported actions for a SQL catalog. + // + // For instance: + // - return 0 (\b0) => [] (no supported actions for SQL catalog); + // - return 1 (\b1) => [SQL_ELEMENT_IN_PROCEDURE_CALLS]; + // - return 2 (\b10) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS]; + // - return 3 (\b11) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS]; + // - return 4 (\b100) => [SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 5 (\b101) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 6 (\b110) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]. + // Valid actions for a SQL catalog are described under `arrow.flight.protocol.sql.SqlSupportedElementActions`. + SqlInfoCatalogsSupportedActions = SqlInfo(pb.SqlInfo_SQL_CATALOGS_SUPPORTED_ACTIONS) + + // Retrieves the supported SQL positioned commands. + // + // Returns an int32 bitmask value representing the supported SQL positioned commands. + // The returned bitmask should be parsed in order to retrieve the supported SQL positioned commands. + // + // For instance: + // - return 0 (\b0) => [] (no supported SQL positioned commands); + // - return 1 (\b1) => [SQL_POSITIONED_DELETE]; + // - return 2 (\b10) => [SQL_POSITIONED_UPDATE]; + // - return 3 (\b11) => [SQL_POSITIONED_DELETE, SQL_POSITIONED_UPDATE]. + // Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlSupportedPositionedCommands`. + SqlInfoSupportedPositionedCommands = SqlInfo(pb.SqlInfo_SQL_SUPPORTED_POSITIONED_COMMANDS) + + // Retrieves a boolean value indicating whether SELECT FOR UPDATE statements are supported. + // + // Returns: + // - false: if SELECT FOR UPDATE statements are unsupported; + // - true: if SELECT FOR UPDATE statements are supported. + SqlInfoSelectForUpdateSupported = SqlInfo(pb.SqlInfo_SQL_SELECT_FOR_UPDATE_SUPPORTED) + + // Retrieves a boolean value indicating whether stored procedure calls that use the stored procedure escape syntax + // are supported. + // + // Returns: + // - false: if stored procedure calls that use the stored procedure escape syntax are unsupported; + // - true: if stored procedure calls that use the stored procedure escape syntax are supported. + SqlInfoStoredProceduresSupported = SqlInfo(pb.SqlInfo_SQL_STORED_PROCEDURES_SUPPORTED) + + // Retrieves the supported SQL subqueries. + // + // Returns an int32 bitmask value representing the supported SQL subqueries. + // The returned bitmask should be parsed in order to retrieve the supported SQL subqueries. + // + // For instance: + // - return 0 (\b0) => [] (no supported SQL subqueries); + // - return 1 (\b1) => [SQL_SUBQUERIES_IN_COMPARISONS]; + // - return 2 (\b10) => [SQL_SUBQUERIES_IN_EXISTS]; + // - return 3 (\b11) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS]; + // - return 4 (\b100) => [SQL_SUBQUERIES_IN_INS]; + // - return 5 (\b101) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_INS]; + // - return 6 (\b110) => [SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_EXISTS]; + // - return 7 (\b111) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS]; + // - return 8 (\b1000) => [SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 9 (\b1001) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 10 (\b1010) => [SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 11 (\b1011) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 12 (\b1100) => [SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 13 (\b1101) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 14 (\b1110) => [SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 15 (\b1111) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - ... + // Valid SQL subqueries are described under `arrow.flight.protocol.sql.SqlSupportedSubqueries`. + SqlInfoSupportedSubqueries = SqlInfo(pb.SqlInfo_SQL_SUPPORTED_SUBQUERIES) + + // Retrieves a boolean value indicating whether correlated subqueries are supported. + // + // Returns: + // - false: if correlated subqueries are unsupported; + // - true: if correlated subqueries are supported. + SqlInfoCorrelatedSubqueriesSupported = SqlInfo(pb.SqlInfo_SQL_CORRELATED_SUBQUERIES_SUPPORTED) + + // Retrieves the supported SQL UNIONs. + // + // Returns an int32 bitmask value representing the supported SQL UNIONs. + // The returned bitmask should be parsed in order to retrieve the supported SQL UNIONs. + // + // For instance: + // - return 0 (\b0) => [] (no supported SQL positioned commands); + // - return 1 (\b1) => [SQL_UNION]; + // - return 2 (\b10) => [SQL_UNION_ALL]; + // - return 3 (\b11) => [SQL_UNION, SQL_UNION_ALL]. + // Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlSupportedUnions`. + SqlInfoSupportedUnions = SqlInfo(pb.SqlSupportedUnions_SQL_UNION) + + // Retrieves a int64 value representing the maximum number of hex characters allowed in an inline binary literal. + SqlInfoMaxBinaryLiteralLen = SqlInfo(pb.SqlInfo_SQL_MAX_BINARY_LITERAL_LENGTH) + // Retrieves a int64 value representing the maximum number of characters allowed for a character literal. + SqlInfoMaxCharLiteralLen = SqlInfo(pb.SqlInfo_SQL_MAX_CHAR_LITERAL_LENGTH) + // Retrieves a int64 value representing the maximum number of characters allowed for a column name. + SqlInfoMaxColumnNameLen = SqlInfo(pb.SqlInfo_SQL_MAX_COLUMN_NAME_LENGTH) + // Retrieves a int64 value representing the the maximum number of columns allowed in a GROUP BY clause. + SqlInfoMaxColumnsInGroupBy = SqlInfo(pb.SqlInfo_SQL_MAX_COLUMNS_IN_GROUP_BY) + // Retrieves a int64 value representing the maximum number of columns allowed in an index. + SqlInfoMaxColumnsInIndex = SqlInfo(pb.SqlInfo_SQL_MAX_COLUMNS_IN_INDEX) + // Retrieves a int64 value representing the maximum number of columns allowed in an ORDER BY clause. + SqlInfoMaxColumnsInOrderBy = SqlInfo(pb.SqlInfo_SQL_MAX_COLUMNS_IN_ORDER_BY) + // Retrieves a int64 value representing the maximum number of columns allowed in a SELECT list. + SqlInfoMaxColumnsInSelect = SqlInfo(pb.SqlInfo_SQL_MAX_COLUMNS_IN_SELECT) + // Retrieves a int64 value representing the maximum number of columns allowed in a table. + SqlInfoMaxColumnsInTable = SqlInfo(pb.SqlInfo_SQL_MAX_COLUMNS_IN_TABLE) + // Retrieves a int64 value representing the maximum number of concurrent connections possible. + SqlInfoMaxConnections = SqlInfo(pb.SqlInfo_SQL_MAX_CONNECTIONS) + // Retrieves a int64 value the maximum number of characters allowed in a cursor name. + SqlInfoMaxCursorNameLen = SqlInfo(pb.SqlInfo_SQL_MAX_CURSOR_NAME_LENGTH) + + // Retrieves a int64 value representing the maximum number of bytes allowed for an index, + // including all of the parts of the index. + SqlInfoMaxIndexLen = SqlInfo(pb.SqlInfo_SQL_MAX_INDEX_LENGTH) + // Retrieves a int64 value representing the maximum number of characters allowed in a schema name. + SqlInfoDBSchemaNameLen = SqlInfo(pb.SqlInfo_SQL_DB_SCHEMA_NAME_LENGTH) + // Retrieves a int64 value representing the maximum number of characters allowed in a procedure name. + SqlInfoMaxProcedureNameLen = SqlInfo(pb.SqlInfo_SQL_MAX_PROCEDURE_NAME_LENGTH) + // Retrieves a int64 value representing the maximum number of characters allowed in a catalog name. + SqlInfoMaxCatalogNameLen = SqlInfo(pb.SqlInfo_SQL_MAX_CATALOG_NAME_LENGTH) + // Retrieves a int64 value representing the maximum number of bytes allowed in a single row. + SqlInfoMaxRowSize = SqlInfo(pb.SqlInfo_SQL_MAX_ROW_SIZE) + + // Retrieves a boolean indicating whether the return value for the JDBC method getMaxRowSize includes the SQL + // data types LONGVARCHAR and LONGVARBINARY. + // + // Returns: + // - false: if return value for the JDBC method getMaxRowSize does + // not include the SQL data types LONGVARCHAR and LONGVARBINARY; + // - true: if return value for the JDBC method getMaxRowSize includes + // the SQL data types LONGVARCHAR and LONGVARBINARY. + SqlInfoMaxRowSizeIncludesBlobs = SqlInfo(pb.SqlInfo_SQL_MAX_ROW_SIZE_INCLUDES_BLOBS) + + // Retrieves a int64 value representing the maximum number of characters allowed for an SQL statement; + // a result of 0 (zero) means that there is no limit or the limit is not known. + SqlInfoMaxStatementLen = SqlInfo(pb.SqlInfo_SQL_MAX_STATEMENT_LENGTH) + // Retrieves a int64 value representing the maximum number of active statements that can be open at the same time. + SqlInfoMaxStatements = SqlInfo(pb.SqlInfo_SQL_MAX_STATEMENTS) + // Retrieves a int64 value representing the maximum number of characters allowed in a table name. + SqlInfoMaxTableNameLen = SqlInfo(pb.SqlInfo_SQL_MAX_TABLE_NAME_LENGTH) + // Retrieves a int64 value representing the maximum number of tables allowed in a SELECT statement. + SqlInfoMaxTablesInSelect = SqlInfo(pb.SqlInfo_SQL_MAX_TABLES_IN_SELECT) + // Retrieves a int64 value representing the maximum number of characters allowed in a user name. + SqlInfoMaxUsernameLen = SqlInfo(pb.SqlInfo_SQL_MAX_USERNAME_LENGTH) + + // Retrieves this database's default transaction isolation level as described in + // `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`. + // + // Returns a int32 ordinal for the SQL transaction isolation level. + SqlInfoDefaultTransactionIsolation = SqlInfo(pb.SqlInfo_SQL_DEFAULT_TRANSACTION_ISOLATION) + + // Retrieves a boolean value indicating whether transactions are supported. If not, invoking the method commit is a + // noop, and the isolation level is `arrow.flight.protocol.sql.SqlTransactionIsolationLevel.TRANSACTION_NONE`. + // + // Returns: + // - false: if transactions are unsupported; + // - true: if transactions are supported. + SqlInfoTransactionsSupported = SqlInfo(pb.SqlInfo_SQL_TRANSACTIONS_SUPPORTED) + + // Retrieves the supported transactions isolation levels. + // + // Returns an int32 bitmask value representing the supported transactions isolation levels. + // The returned bitmask should be parsed in order to retrieve the supported transactions isolation levels. + // + // For instance: + // - return 0 (\b0) => [] (no supported SQL transactions isolation levels); + // - return 1 (\b1) => [SQL_TRANSACTION_NONE]; + // - return 2 (\b10) => [SQL_TRANSACTION_READ_UNCOMMITTED]; + // - return 3 (\b11) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED]; + // - return 4 (\b100) => [SQL_TRANSACTION_REPEATABLE_READ]; + // - return 5 (\b101) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 6 (\b110) => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 7 (\b111) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 8 (\b1000) => [SQL_TRANSACTION_REPEATABLE_READ]; + // - return 9 (\b1001) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 10 (\b1010) => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 11 (\b1011) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 12 (\b1100) => [SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 13 (\b1101) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 14 (\b1110) => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 15 (\b1111) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 16 (\b10000) => [SQL_TRANSACTION_SERIALIZABLE]; + // - ... + // Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`. + SqlInfoSupportedTransactionsIsolationlevels = SqlInfo(pb.SqlInfo_SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS) + + // Retrieves a boolean value indicating whether a data definition statement within a transaction forces + // the transaction to commit. + // + // Returns: + // - false: if a data definition statement within a transaction does not force the transaction to commit; + // - true: if a data definition statement within a transaction forces the transaction to commit. + SqlInfoDataDefinitionCausesTransactionCommit = SqlInfo(pb.SqlInfo_SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT) + + // Retrieves a boolean value indicating whether a data definition statement within a transaction is ignored. + // + // Returns: + // - false: if a data definition statement within a transaction is taken into account; + // - true: a data definition statement within a transaction is ignored. + SqlInfoDataDefinitionsInTransactionsIgnored = SqlInfo(pb.SqlInfo_SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED) + + // Retrieves an int32 bitmask value representing the supported result set types. + // The returned bitmask should be parsed in order to retrieve the supported result set types. + // + // For instance: + // - return 0 (\b0) => [] (no supported result set types); + // - return 1 (\b1) => [SQL_RESULT_SET_TYPE_UNSPECIFIED]; + // - return 2 (\b10) => [SQL_RESULT_SET_TYPE_FORWARD_ONLY]; + // - return 3 (\b11) => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_FORWARD_ONLY]; + // - return 4 (\b100) => [SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE]; + // - return 5 (\b101) => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE]; + // - return 6 (\b110) => [SQL_RESULT_SET_TYPE_FORWARD_ONLY, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE]; + // - return 7 (\b111) => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_FORWARD_ONLY, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE]; + // - return 8 (\b1000) => [SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE]; + // - ... + // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetType`. + SqlInfoSupportedResultSetTypes = SqlInfo(pb.SqlInfo_SQL_SUPPORTED_RESULT_SET_TYPES) + + // Returns an int32 bitmask value concurrency types supported for + // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_UNSPECIFIED`. + // + // For instance: + // - return 0 (\b0) => [] (no supported concurrency types for this result set type) + // - return 1 (\b1) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED] + // - return 2 (\b10) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 3 (\b11) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 6 (\b110) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. + SqlInfoSupportedConcurrenciesForResultSetUnspecified = SqlInfo(pb.SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_UNSPECIFIED) + + // Returns an int32 bitmask value concurrency types supported for + // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_FORWARD_ONLY`. + // + // For instance: + // - return 0 (\b0) => [] (no supported concurrency types for this result set type) + // - return 1 (\b1) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED] + // - return 2 (\b10) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 3 (\b11) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 6 (\b110) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. + SqlInfoSupportedConcurrenciesForResultSetForwardOnly = SqlInfo(pb.SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_FORWARD_ONLY) + + // Returns an int32 bitmask value concurrency types supported for + // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE`. + // + // For instance: + // - return 0 (\b0) => [] (no supported concurrency types for this result set type) + // - return 1 (\b1) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED] + // - return 2 (\b10) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 3 (\b11) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 6 (\b110) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. + SqlInfoSupportedConcurrenciesForResultSetScrollSensitive = SqlInfo(pb.SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_SENSITIVE) + + // Returns an int32 bitmask value concurrency types supported for + // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE`. + // + // For instance: + // - return 0 (\b0) => [] (no supported concurrency types for this result set type) + // - return 1 (\b1) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED] + // - return 2 (\b10) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 3 (\b11) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 6 (\b110) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. + SqlInfoSupportedConcurrenciesForResultSetScrollInensitive = SqlInfo(pb.SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_INSENSITIVE) + + // Retrieves a boolean value indicating whether this database supports batch updates. + // + // - false: if this database does not support batch updates; + // - true: if this database supports batch updates. + SqlInfoBatchUpdatesSupported = SqlInfo(pb.SqlInfo_SQL_BATCH_UPDATES_SUPPORTED) + + // Retrieves a boolean value indicating whether this database supports savepoints. + // + // Returns: + // - false: if this database does not support savepoints; + // - true: if this database supports savepoints. + SqlInfoSavePointsSupported = SqlInfo(pb.SqlInfo_SQL_SAVEPOINTS_SUPPORTED) + + // Retrieves a boolean value indicating whether named parameters are supported in callable statements. + // + // Returns: + // - false: if named parameters in callable statements are unsupported; + // - true: if named parameters in callable statements are supported. + SqlInfoNamedParametersSupported = SqlInfo(pb.SqlInfo_SQL_NAMED_PARAMETERS_SUPPORTED) + + // Retrieves a boolean value indicating whether updates made to a LOB are made on a copy or directly to the LOB. + // + // Returns: + // - false: if updates made to a LOB are made directly to the LOB; + // - true: if updates made to a LOB are made on a copy. + SqlInfoLocatorsUpdateCopy = SqlInfo(pb.SqlInfo_SQL_LOCATORS_UPDATE_COPY) + + // Retrieves a boolean value indicating whether invoking user-defined or vendor functions + // using the stored procedure escape syntax is supported. + // + // Returns: + // - false: if invoking user-defined or vendor functions using the stored procedure escape syntax is unsupported; + // - true: if invoking user-defined or vendor functions using the stored procedure escape syntax is supported. + SqlInfoStoredFunctionsUsingCallSyntaxSupported = SqlInfo(pb.SqlInfo_SQL_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED) +) + +func (s SqlInfo) String() string { return pb.SqlInfo(int32(s)).String() } + +// SqlSupportedCaseSensitivity indicates whether something +// (e.g. an identifier) is case-sensitive +// +// duplicated from protobuf to avoid relying directly on the protobuf +// generated code, also making them shorter and easier to use +type SqlSupportedCaseSensitivity = pb.SqlSupportedCaseSensitivity + +const ( + SqlCaseSensitivityUnknown = pb.SqlSupportedCaseSensitivity_SQL_CASE_SENSITIVITY_UNKNOWN + SqlCaseSensitivityCaseInsensitive = pb.SqlSupportedCaseSensitivity_SQL_CASE_SENSITIVITY_CASE_INSENSITIVE + SqlCaseSensitivityUpperCase = pb.SqlSupportedCaseSensitivity_SQL_CASE_SENSITIVITY_UPPERCASE + SqlCaseSensitivityLowerCase = pb.SqlSupportedCaseSensitivity_SQL_CASE_SENSITIVITY_LOWERCASE +) + +// SqlNullOrdering indicates how nulls are sorted +// +// duplicated from protobuf to avoid relying directly on the protobuf +// generated code, also making them shorter and easier to use +type SqlNullOrdering = pb.SqlNullOrdering + +const ( + SqlNullOrderingSortHigh = pb.SqlNullOrdering_SQL_NULLS_SORTED_HIGH + SqlNullOrderingSortLow = pb.SqlNullOrdering_SQL_NULLS_SORTED_LOW + SqlNullOrderingSortAtStart = pb.SqlNullOrdering_SQL_NULLS_SORTED_AT_START + SqlNullOrderingSortAtEnd = pb.SqlNullOrdering_SQL_NULLS_SORTED_AT_END +) + +// SqlSupportsConvert indicates support for converting between different +// types. +// +// duplicated from protobuf to avoid relying directly on the protobuf +// generated code, also making them shorter and easier to use +type SqlSupportsConvert = pb.SqlSupportsConvert + +const ( + SqlConvertBigInt = pb.SqlSupportsConvert_SQL_CONVERT_BIGINT + SqlConvertBinary = pb.SqlSupportsConvert_SQL_CONVERT_BINARY + SqlConvertBit = pb.SqlSupportsConvert_SQL_CONVERT_BIT + SqlConvertChar = pb.SqlSupportsConvert_SQL_CONVERT_CHAR + SqlConvertDate = pb.SqlSupportsConvert_SQL_CONVERT_DATE + SqlConvertDecimal = pb.SqlSupportsConvert_SQL_CONVERT_DECIMAL + SqlConvertFloat = pb.SqlSupportsConvert_SQL_CONVERT_FLOAT + SqlConvertInteger = pb.SqlSupportsConvert_SQL_CONVERT_INTEGER + SqlConvertIntervalDayTime = pb.SqlSupportsConvert_SQL_CONVERT_INTERVAL_DAY_TIME + SqlConvertIntervalYearMonth = pb.SqlSupportsConvert_SQL_CONVERT_INTERVAL_YEAR_MONTH + SqlConvertLongVarbinary = pb.SqlSupportsConvert_SQL_CONVERT_LONGVARBINARY + SqlConvertLongVarchar = pb.SqlSupportsConvert_SQL_CONVERT_LONGVARCHAR + SqlConvertNumeric = pb.SqlSupportsConvert_SQL_CONVERT_NUMERIC + SqlConvertReal = pb.SqlSupportsConvert_SQL_CONVERT_REAL + SqlConvertSmallInt = pb.SqlSupportsConvert_SQL_CONVERT_SMALLINT + SqlConvertTime = pb.SqlSupportsConvert_SQL_CONVERT_TIME + SqlConvertTimestamp = pb.SqlSupportsConvert_SQL_CONVERT_TIMESTAMP + SqlConvertTinyInt = pb.SqlSupportsConvert_SQL_CONVERT_TINYINT + SqlConvertVarbinary = pb.SqlSupportsConvert_SQL_CONVERT_VARBINARY + SqlConvertVarchar = pb.SqlSupportsConvert_SQL_CONVERT_VARCHAR +) diff --git a/go/arrow/flight/gen.go b/go/arrow/flight/gen.go index be55119c7fd20..4109059af8ed1 100644 --- a/go/arrow/flight/gen.go +++ b/go/arrow/flight/gen.go @@ -17,3 +17,4 @@ package flight //go:generate protoc -I../../../format --go_out=./internal/flight --go-grpc_out=./internal/flight --go_opt=paths=source_relative --go-grpc_opt=paths=source_relative Flight.proto +//go:generate protoc --experimental_allow_proto3_optional -I../../../format --go_out=./internal/flight --go-grpc_out=./internal/flight --go_opt=paths=source_relative --go-grpc_opt=paths=source_relative FlightSql.proto diff --git a/go/arrow/flight/internal/flight/Flight.pb.go b/go/arrow/flight/internal/flight/Flight.pb.go index 18ba75bb66ec6..b7be492acd424 100644 --- a/go/arrow/flight/internal/flight/Flight.pb.go +++ b/go/arrow/flight/internal/flight/Flight.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.27.1 +// protoc-gen-go v1.28.1 // protoc v3.12.4 // source: Flight.proto @@ -666,8 +666,15 @@ type FlightInfo struct { // The descriptor associated with this info. FlightDescriptor *FlightDescriptor `protobuf:"bytes,2,opt,name=flight_descriptor,json=flightDescriptor,proto3" json:"flight_descriptor,omitempty"` // - // A list of endpoints associated with the flight. To consume the whole - // flight, all endpoints must be consumed. + // A list of endpoints associated with the flight. To consume the + // whole flight, all endpoints (and hence all Tickets) must be + // consumed. Endpoints can be consumed in any order. + // + // In other words, an application can use multiple endpoints to + // represent partitioned data. + // + // There is no ordering defined on endpoints. Hence, if the returned + // data has an ordering, it should be returned in a single endpoint. Endpoint []*FlightEndpoint `protobuf:"bytes,3,rep,name=endpoint,proto3" json:"endpoint,omitempty"` // Set these to -1 if unknown. TotalRecords int64 `protobuf:"varint,4,opt,name=total_records,json=totalRecords,proto3" json:"total_records,omitempty"` @@ -752,9 +759,20 @@ type FlightEndpoint struct { // Token used to retrieve this stream. Ticket *Ticket `protobuf:"bytes,1,opt,name=ticket,proto3" json:"ticket,omitempty"` // - // A list of URIs where this ticket can be redeemed. If the list is - // empty, the expectation is that the ticket can only be redeemed on the - // current service where the ticket was generated. + // A list of URIs where this ticket can be redeemed via DoGet(). + // + // If the list is empty, the expectation is that the ticket can only + // be redeemed on the current service where the ticket was + // generated. + // + // If the list is not empty, the expectation is that the ticket can + // be redeemed at any of the locations, and that the data returned + // will be equivalent. In this case, the ticket may only be redeemed + // at one of the given locations, and not (necessarily) on the + // current service. + // + // In other words, an application can use multiple locations to + // represent redundant and/or load balanced services. Location []*Location `protobuf:"bytes,2,rep,name=location,proto3" json:"location,omitempty"` } @@ -857,6 +875,9 @@ func (x *Location) GetUri() string { // // An opaque identifier that the service can use to retrieve a particular // portion of a stream. +// +// Tickets are meant to be single use. It is an error/application-defined +// behavior to reuse a ticket. type Ticket struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1180,14 +1201,15 @@ var file_Flight_proto_rawDesc = []byte{ 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, - 0x65, 0x22, 0x00, 0x30, 0x01, 0x42, 0x67, 0x0a, 0x1c, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, + 0x65, 0x22, 0x00, 0x30, 0x01, 0x42, 0x76, 0x0a, 0x1c, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, - 0x2e, 0x69, 0x6d, 0x70, 0x6c, 0x5a, 0x28, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, + 0x2e, 0x69, 0x6d, 0x70, 0x6c, 0x5a, 0x37, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2f, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2f, 0x67, - 0x6f, 0x2f, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x3b, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0xaa, - 0x02, 0x1c, 0x41, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x41, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x46, - 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x62, 0x06, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x6f, 0x2f, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2f, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2f, 0x69, + 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0xaa, 0x02, + 0x1c, 0x41, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x41, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x46, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x2e, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x62, 0x06, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/go/arrow/flight/internal/flight/FlightSql.pb.go b/go/arrow/flight/internal/flight/FlightSql.pb.go new file mode 100644 index 0000000000000..126d8539cf4c8 --- /dev/null +++ b/go/arrow/flight/internal/flight/FlightSql.pb.go @@ -0,0 +1,4395 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.28.1 +// protoc v3.12.4 +// source: FlightSql.proto + +package flight + +import ( + descriptor "google.golang.org/protobuf/types/descriptorpb" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// Options for CommandGetSqlInfo. +type SqlInfo int32 + +const ( + // Retrieves a UTF-8 string with the name of the Flight SQL Server. + SqlInfo_FLIGHT_SQL_SERVER_NAME SqlInfo = 0 + // Retrieves a UTF-8 string with the native version of the Flight SQL Server. + SqlInfo_FLIGHT_SQL_SERVER_VERSION SqlInfo = 1 + // Retrieves a UTF-8 string with the Arrow format version of the Flight SQL Server. + SqlInfo_FLIGHT_SQL_SERVER_ARROW_VERSION SqlInfo = 2 + // + // Retrieves a boolean value indicating whether the Flight SQL Server is read only. + // + // Returns: + // - false: if read-write + // - true: if read only + SqlInfo_FLIGHT_SQL_SERVER_READ_ONLY SqlInfo = 3 + // + // Retrieves a boolean value indicating whether the Flight SQL Server supports CREATE and DROP of catalogs. + // + // Returns: + // - false: if it doesn't support CREATE and DROP of catalogs. + // - true: if it supports CREATE and DROP of catalogs. + SqlInfo_SQL_DDL_CATALOG SqlInfo = 500 + // + // Retrieves a boolean value indicating whether the Flight SQL Server supports CREATE and DROP of schemas. + // + // Returns: + // - false: if it doesn't support CREATE and DROP of schemas. + // - true: if it supports CREATE and DROP of schemas. + SqlInfo_SQL_DDL_SCHEMA SqlInfo = 501 + // + // Indicates whether the Flight SQL Server supports CREATE and DROP of tables. + // + // Returns: + // - false: if it doesn't support CREATE and DROP of tables. + // - true: if it supports CREATE and DROP of tables. + SqlInfo_SQL_DDL_TABLE SqlInfo = 502 + // + // Retrieves a int32 ordinal representing the case sensitivity of catalog, table, schema and table names. + // + // The possible values are listed in `arrow.flight.protocol.sql.SqlSupportedCaseSensitivity`. + SqlInfo_SQL_IDENTIFIER_CASE SqlInfo = 503 + // Retrieves a UTF-8 string with the supported character(s) used to surround a delimited identifier. + SqlInfo_SQL_IDENTIFIER_QUOTE_CHAR SqlInfo = 504 + // + // Retrieves a int32 describing the case sensitivity of quoted identifiers. + // + // The possible values are listed in `arrow.flight.protocol.sql.SqlSupportedCaseSensitivity`. + SqlInfo_SQL_QUOTED_IDENTIFIER_CASE SqlInfo = 505 + // + // Retrieves a boolean value indicating whether all tables are selectable. + // + // Returns: + // - false: if not all tables are selectable or if none are; + // - true: if all tables are selectable. + SqlInfo_SQL_ALL_TABLES_ARE_SELECTABLE SqlInfo = 506 + // + // Retrieves the null ordering. + // + // Returns a int32 ordinal for the null ordering being used, as described in + // `arrow.flight.protocol.sql.SqlNullOrdering`. + SqlInfo_SQL_NULL_ORDERING SqlInfo = 507 + // Retrieves a UTF-8 string list with values of the supported keywords. + SqlInfo_SQL_KEYWORDS SqlInfo = 508 + // Retrieves a UTF-8 string list with values of the supported numeric functions. + SqlInfo_SQL_NUMERIC_FUNCTIONS SqlInfo = 509 + // Retrieves a UTF-8 string list with values of the supported string functions. + SqlInfo_SQL_STRING_FUNCTIONS SqlInfo = 510 + // Retrieves a UTF-8 string list with values of the supported system functions. + SqlInfo_SQL_SYSTEM_FUNCTIONS SqlInfo = 511 + // Retrieves a UTF-8 string list with values of the supported datetime functions. + SqlInfo_SQL_DATETIME_FUNCTIONS SqlInfo = 512 + // + // Retrieves the UTF-8 string that can be used to escape wildcard characters. + // This is the string that can be used to escape '_' or '%' in the catalog search parameters that are a pattern + // (and therefore use one of the wildcard characters). + // The '_' character represents any single character; the '%' character represents any sequence of zero or more + // characters. + SqlInfo_SQL_SEARCH_STRING_ESCAPE SqlInfo = 513 + // + // Retrieves a UTF-8 string with all the "extra" characters that can be used in unquoted identifier names + // (those beyond a-z, A-Z, 0-9 and _). + SqlInfo_SQL_EXTRA_NAME_CHARACTERS SqlInfo = 514 + // + // Retrieves a boolean value indicating whether column aliasing is supported. + // If so, the SQL AS clause can be used to provide names for computed columns or to provide alias names for columns + // as required. + // + // Returns: + // - false: if column aliasing is unsupported; + // - true: if column aliasing is supported. + SqlInfo_SQL_SUPPORTS_COLUMN_ALIASING SqlInfo = 515 + // + // Retrieves a boolean value indicating whether concatenations between null and non-null values being + // null are supported. + // + // - Returns: + // - false: if concatenations between null and non-null values being null are unsupported; + // - true: if concatenations between null and non-null values being null are supported. + SqlInfo_SQL_NULL_PLUS_NULL_IS_NULL SqlInfo = 516 + // + // Retrieves a map where the key is the type to convert from and the value is a list with the types to convert to, + // indicating the supported conversions. Each key and each item on the list value is a value to a predefined type on + // SqlSupportsConvert enum. + // The returned map will be: map> + SqlInfo_SQL_SUPPORTS_CONVERT SqlInfo = 517 + // + // Retrieves a boolean value indicating whether, when table correlation names are supported, + // they are restricted to being different from the names of the tables. + // + // Returns: + // - false: if table correlation names are unsupported; + // - true: if table correlation names are supported. + SqlInfo_SQL_SUPPORTS_TABLE_CORRELATION_NAMES SqlInfo = 518 + // + // Retrieves a boolean value indicating whether, when table correlation names are supported, + // they are restricted to being different from the names of the tables. + // + // Returns: + // - false: if different table correlation names are unsupported; + // - true: if different table correlation names are supported + SqlInfo_SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES SqlInfo = 519 + // + // Retrieves a boolean value indicating whether expressions in ORDER BY lists are supported. + // + // Returns: + // - false: if expressions in ORDER BY are unsupported; + // - true: if expressions in ORDER BY are supported; + SqlInfo_SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY SqlInfo = 520 + // + // Retrieves a boolean value indicating whether using a column that is not in the SELECT statement in a GROUP BY + // clause is supported. + // + // Returns: + // - false: if using a column that is not in the SELECT statement in a GROUP BY clause is unsupported; + // - true: if using a column that is not in the SELECT statement in a GROUP BY clause is supported. + SqlInfo_SQL_SUPPORTS_ORDER_BY_UNRELATED SqlInfo = 521 + // + // Retrieves the supported GROUP BY commands; + // + // Returns an int32 bitmask value representing the supported commands. + // The returned bitmask should be parsed in order to retrieve the supported commands. + // + // For instance: + // - return 0 (\b0) => [] (GROUP BY is unsupported); + // - return 1 (\b1) => [SQL_GROUP_BY_UNRELATED]; + // - return 2 (\b10) => [SQL_GROUP_BY_BEYOND_SELECT]; + // - return 3 (\b11) => [SQL_GROUP_BY_UNRELATED, SQL_GROUP_BY_BEYOND_SELECT]. + // Valid GROUP BY types are described under `arrow.flight.protocol.sql.SqlSupportedGroupBy`. + SqlInfo_SQL_SUPPORTED_GROUP_BY SqlInfo = 522 + // + // Retrieves a boolean value indicating whether specifying a LIKE escape clause is supported. + // + // Returns: + // - false: if specifying a LIKE escape clause is unsupported; + // - true: if specifying a LIKE escape clause is supported. + SqlInfo_SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE SqlInfo = 523 + // + // Retrieves a boolean value indicating whether columns may be defined as non-nullable. + // + // Returns: + // - false: if columns cannot be defined as non-nullable; + // - true: if columns may be defined as non-nullable. + SqlInfo_SQL_SUPPORTS_NON_NULLABLE_COLUMNS SqlInfo = 524 + // + // Retrieves the supported SQL grammar level as per the ODBC specification. + // + // Returns an int32 bitmask value representing the supported SQL grammar level. + // The returned bitmask should be parsed in order to retrieve the supported grammar levels. + // + // For instance: + // - return 0 (\b0) => [] (SQL grammar is unsupported); + // - return 1 (\b1) => [SQL_MINIMUM_GRAMMAR]; + // - return 2 (\b10) => [SQL_CORE_GRAMMAR]; + // - return 3 (\b11) => [SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR]; + // - return 4 (\b100) => [SQL_EXTENDED_GRAMMAR]; + // - return 5 (\b101) => [SQL_MINIMUM_GRAMMAR, SQL_EXTENDED_GRAMMAR]; + // - return 6 (\b110) => [SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR]; + // - return 7 (\b111) => [SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR]. + // Valid SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedSqlGrammar`. + SqlInfo_SQL_SUPPORTED_GRAMMAR SqlInfo = 525 + // + // Retrieves the supported ANSI92 SQL grammar level. + // + // Returns an int32 bitmask value representing the supported ANSI92 SQL grammar level. + // The returned bitmask should be parsed in order to retrieve the supported commands. + // + // For instance: + // - return 0 (\b0) => [] (ANSI92 SQL grammar is unsupported); + // - return 1 (\b1) => [ANSI92_ENTRY_SQL]; + // - return 2 (\b10) => [ANSI92_INTERMEDIATE_SQL]; + // - return 3 (\b11) => [ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL]; + // - return 4 (\b100) => [ANSI92_FULL_SQL]; + // - return 5 (\b101) => [ANSI92_ENTRY_SQL, ANSI92_FULL_SQL]; + // - return 6 (\b110) => [ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL]; + // - return 7 (\b111) => [ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL]. + // Valid ANSI92 SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedAnsi92SqlGrammarLevel`. + SqlInfo_SQL_ANSI92_SUPPORTED_LEVEL SqlInfo = 526 + // + // Retrieves a boolean value indicating whether the SQL Integrity Enhancement Facility is supported. + // + // Returns: + // - false: if the SQL Integrity Enhancement Facility is supported; + // - true: if the SQL Integrity Enhancement Facility is supported. + SqlInfo_SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY SqlInfo = 527 + // + // Retrieves the support level for SQL OUTER JOINs. + // + // Returns a int32 ordinal for the SQL ordering being used, as described in + // `arrow.flight.protocol.sql.SqlOuterJoinsSupportLevel`. + SqlInfo_SQL_OUTER_JOINS_SUPPORT_LEVEL SqlInfo = 528 + // Retrieves a UTF-8 string with the preferred term for "schema". + SqlInfo_SQL_SCHEMA_TERM SqlInfo = 529 + // Retrieves a UTF-8 string with the preferred term for "procedure". + SqlInfo_SQL_PROCEDURE_TERM SqlInfo = 530 + // + // Retrieves a UTF-8 string with the preferred term for "catalog". + // If a empty string is returned its assumed that the server does NOT supports catalogs. + SqlInfo_SQL_CATALOG_TERM SqlInfo = 531 + // + // Retrieves a boolean value indicating whether a catalog appears at the start of a fully qualified table name. + // + // - false: if a catalog does not appear at the start of a fully qualified table name; + // - true: if a catalog appears at the start of a fully qualified table name. + SqlInfo_SQL_CATALOG_AT_START SqlInfo = 532 + // + // Retrieves the supported actions for a SQL schema. + // + // Returns an int32 bitmask value representing the supported actions for a SQL schema. + // The returned bitmask should be parsed in order to retrieve the supported actions for a SQL schema. + // + // For instance: + // - return 0 (\b0) => [] (no supported actions for SQL schema); + // - return 1 (\b1) => [SQL_ELEMENT_IN_PROCEDURE_CALLS]; + // - return 2 (\b10) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS]; + // - return 3 (\b11) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS]; + // - return 4 (\b100) => [SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 5 (\b101) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 6 (\b110) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]. + // Valid actions for a SQL schema described under `arrow.flight.protocol.sql.SqlSupportedElementActions`. + SqlInfo_SQL_SCHEMAS_SUPPORTED_ACTIONS SqlInfo = 533 + // + // Retrieves the supported actions for a SQL schema. + // + // Returns an int32 bitmask value representing the supported actions for a SQL catalog. + // The returned bitmask should be parsed in order to retrieve the supported actions for a SQL catalog. + // + // For instance: + // - return 0 (\b0) => [] (no supported actions for SQL catalog); + // - return 1 (\b1) => [SQL_ELEMENT_IN_PROCEDURE_CALLS]; + // - return 2 (\b10) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS]; + // - return 3 (\b11) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS]; + // - return 4 (\b100) => [SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 5 (\b101) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 6 (\b110) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]; + // - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS]. + // Valid actions for a SQL catalog are described under `arrow.flight.protocol.sql.SqlSupportedElementActions`. + SqlInfo_SQL_CATALOGS_SUPPORTED_ACTIONS SqlInfo = 534 + // + // Retrieves the supported SQL positioned commands. + // + // Returns an int32 bitmask value representing the supported SQL positioned commands. + // The returned bitmask should be parsed in order to retrieve the supported SQL positioned commands. + // + // For instance: + // - return 0 (\b0) => [] (no supported SQL positioned commands); + // - return 1 (\b1) => [SQL_POSITIONED_DELETE]; + // - return 2 (\b10) => [SQL_POSITIONED_UPDATE]; + // - return 3 (\b11) => [SQL_POSITIONED_DELETE, SQL_POSITIONED_UPDATE]. + // Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlSupportedPositionedCommands`. + SqlInfo_SQL_SUPPORTED_POSITIONED_COMMANDS SqlInfo = 535 + // + // Retrieves a boolean value indicating whether SELECT FOR UPDATE statements are supported. + // + // Returns: + // - false: if SELECT FOR UPDATE statements are unsupported; + // - true: if SELECT FOR UPDATE statements are supported. + SqlInfo_SQL_SELECT_FOR_UPDATE_SUPPORTED SqlInfo = 536 + // + // Retrieves a boolean value indicating whether stored procedure calls that use the stored procedure escape syntax + // are supported. + // + // Returns: + // - false: if stored procedure calls that use the stored procedure escape syntax are unsupported; + // - true: if stored procedure calls that use the stored procedure escape syntax are supported. + SqlInfo_SQL_STORED_PROCEDURES_SUPPORTED SqlInfo = 537 + // + // Retrieves the supported SQL subqueries. + // + // Returns an int32 bitmask value representing the supported SQL subqueries. + // The returned bitmask should be parsed in order to retrieve the supported SQL subqueries. + // + // For instance: + // - return 0 (\b0) => [] (no supported SQL subqueries); + // - return 1 (\b1) => [SQL_SUBQUERIES_IN_COMPARISONS]; + // - return 2 (\b10) => [SQL_SUBQUERIES_IN_EXISTS]; + // - return 3 (\b11) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS]; + // - return 4 (\b100) => [SQL_SUBQUERIES_IN_INS]; + // - return 5 (\b101) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_INS]; + // - return 6 (\b110) => [SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_EXISTS]; + // - return 7 (\b111) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS]; + // - return 8 (\b1000) => [SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 9 (\b1001) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 10 (\b1010) => [SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 11 (\b1011) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 12 (\b1100) => [SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 13 (\b1101) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 14 (\b1110) => [SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - return 15 (\b1111) => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS]; + // - ... + // Valid SQL subqueries are described under `arrow.flight.protocol.sql.SqlSupportedSubqueries`. + SqlInfo_SQL_SUPPORTED_SUBQUERIES SqlInfo = 538 + // + // Retrieves a boolean value indicating whether correlated subqueries are supported. + // + // Returns: + // - false: if correlated subqueries are unsupported; + // - true: if correlated subqueries are supported. + SqlInfo_SQL_CORRELATED_SUBQUERIES_SUPPORTED SqlInfo = 539 + // + // Retrieves the supported SQL UNIONs. + // + // Returns an int32 bitmask value representing the supported SQL UNIONs. + // The returned bitmask should be parsed in order to retrieve the supported SQL UNIONs. + // + // For instance: + // - return 0 (\b0) => [] (no supported SQL positioned commands); + // - return 1 (\b1) => [SQL_UNION]; + // - return 2 (\b10) => [SQL_UNION_ALL]; + // - return 3 (\b11) => [SQL_UNION, SQL_UNION_ALL]. + // Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlSupportedUnions`. + SqlInfo_SQL_SUPPORTED_UNIONS SqlInfo = 540 + // Retrieves a int64 value representing the maximum number of hex characters allowed in an inline binary literal. + SqlInfo_SQL_MAX_BINARY_LITERAL_LENGTH SqlInfo = 541 + // Retrieves a int64 value representing the maximum number of characters allowed for a character literal. + SqlInfo_SQL_MAX_CHAR_LITERAL_LENGTH SqlInfo = 542 + // Retrieves a int64 value representing the maximum number of characters allowed for a column name. + SqlInfo_SQL_MAX_COLUMN_NAME_LENGTH SqlInfo = 543 + // Retrieves a int64 value representing the the maximum number of columns allowed in a GROUP BY clause. + SqlInfo_SQL_MAX_COLUMNS_IN_GROUP_BY SqlInfo = 544 + // Retrieves a int64 value representing the maximum number of columns allowed in an index. + SqlInfo_SQL_MAX_COLUMNS_IN_INDEX SqlInfo = 545 + // Retrieves a int64 value representing the maximum number of columns allowed in an ORDER BY clause. + SqlInfo_SQL_MAX_COLUMNS_IN_ORDER_BY SqlInfo = 546 + // Retrieves a int64 value representing the maximum number of columns allowed in a SELECT list. + SqlInfo_SQL_MAX_COLUMNS_IN_SELECT SqlInfo = 547 + // Retrieves a int64 value representing the maximum number of columns allowed in a table. + SqlInfo_SQL_MAX_COLUMNS_IN_TABLE SqlInfo = 548 + // Retrieves a int64 value representing the maximum number of concurrent connections possible. + SqlInfo_SQL_MAX_CONNECTIONS SqlInfo = 549 + // Retrieves a int64 value the maximum number of characters allowed in a cursor name. + SqlInfo_SQL_MAX_CURSOR_NAME_LENGTH SqlInfo = 550 + // + // Retrieves a int64 value representing the maximum number of bytes allowed for an index, + // including all of the parts of the index. + SqlInfo_SQL_MAX_INDEX_LENGTH SqlInfo = 551 + // Retrieves a int64 value representing the maximum number of characters allowed in a schema name. + SqlInfo_SQL_DB_SCHEMA_NAME_LENGTH SqlInfo = 552 + // Retrieves a int64 value representing the maximum number of characters allowed in a procedure name. + SqlInfo_SQL_MAX_PROCEDURE_NAME_LENGTH SqlInfo = 553 + // Retrieves a int64 value representing the maximum number of characters allowed in a catalog name. + SqlInfo_SQL_MAX_CATALOG_NAME_LENGTH SqlInfo = 554 + // Retrieves a int64 value representing the maximum number of bytes allowed in a single row. + SqlInfo_SQL_MAX_ROW_SIZE SqlInfo = 555 + // + // Retrieves a boolean indicating whether the return value for the JDBC method getMaxRowSize includes the SQL + // data types LONGVARCHAR and LONGVARBINARY. + // + // Returns: + // - false: if return value for the JDBC method getMaxRowSize does + // not include the SQL data types LONGVARCHAR and LONGVARBINARY; + // - true: if return value for the JDBC method getMaxRowSize includes + // the SQL data types LONGVARCHAR and LONGVARBINARY. + SqlInfo_SQL_MAX_ROW_SIZE_INCLUDES_BLOBS SqlInfo = 556 + // + // Retrieves a int64 value representing the maximum number of characters allowed for an SQL statement; + // a result of 0 (zero) means that there is no limit or the limit is not known. + SqlInfo_SQL_MAX_STATEMENT_LENGTH SqlInfo = 557 + // Retrieves a int64 value representing the maximum number of active statements that can be open at the same time. + SqlInfo_SQL_MAX_STATEMENTS SqlInfo = 558 + // Retrieves a int64 value representing the maximum number of characters allowed in a table name. + SqlInfo_SQL_MAX_TABLE_NAME_LENGTH SqlInfo = 559 + // Retrieves a int64 value representing the maximum number of tables allowed in a SELECT statement. + SqlInfo_SQL_MAX_TABLES_IN_SELECT SqlInfo = 560 + // Retrieves a int64 value representing the maximum number of characters allowed in a user name. + SqlInfo_SQL_MAX_USERNAME_LENGTH SqlInfo = 561 + // + // Retrieves this database's default transaction isolation level as described in + // `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`. + // + // Returns a int32 ordinal for the SQL transaction isolation level. + SqlInfo_SQL_DEFAULT_TRANSACTION_ISOLATION SqlInfo = 562 + // + // Retrieves a boolean value indicating whether transactions are supported. If not, invoking the method commit is a + // noop, and the isolation level is `arrow.flight.protocol.sql.SqlTransactionIsolationLevel.TRANSACTION_NONE`. + // + // Returns: + // - false: if transactions are unsupported; + // - true: if transactions are supported. + SqlInfo_SQL_TRANSACTIONS_SUPPORTED SqlInfo = 563 + // + // Retrieves the supported transactions isolation levels. + // + // Returns an int32 bitmask value representing the supported transactions isolation levels. + // The returned bitmask should be parsed in order to retrieve the supported transactions isolation levels. + // + // For instance: + // - return 0 (\b0) => [] (no supported SQL transactions isolation levels); + // - return 1 (\b1) => [SQL_TRANSACTION_NONE]; + // - return 2 (\b10) => [SQL_TRANSACTION_READ_UNCOMMITTED]; + // - return 3 (\b11) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED]; + // - return 4 (\b100) => [SQL_TRANSACTION_REPEATABLE_READ]; + // - return 5 (\b101) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 6 (\b110) => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 7 (\b111) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 8 (\b1000) => [SQL_TRANSACTION_REPEATABLE_READ]; + // - return 9 (\b1001) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 10 (\b1010) => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 11 (\b1011) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 12 (\b1100) => [SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 13 (\b1101) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 14 (\b1110) => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 15 (\b1111) => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ]; + // - return 16 (\b10000) => [SQL_TRANSACTION_SERIALIZABLE]; + // - ... + // Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`. + SqlInfo_SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS SqlInfo = 564 + // + // Retrieves a boolean value indicating whether a data definition statement within a transaction forces + // the transaction to commit. + // + // Returns: + // - false: if a data definition statement within a transaction does not force the transaction to commit; + // - true: if a data definition statement within a transaction forces the transaction to commit. + SqlInfo_SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT SqlInfo = 565 + // + // Retrieves a boolean value indicating whether a data definition statement within a transaction is ignored. + // + // Returns: + // - false: if a data definition statement within a transaction is taken into account; + // - true: a data definition statement within a transaction is ignored. + SqlInfo_SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED SqlInfo = 566 + // + // Retrieves an int32 bitmask value representing the supported result set types. + // The returned bitmask should be parsed in order to retrieve the supported result set types. + // + // For instance: + // - return 0 (\b0) => [] (no supported result set types); + // - return 1 (\b1) => [SQL_RESULT_SET_TYPE_UNSPECIFIED]; + // - return 2 (\b10) => [SQL_RESULT_SET_TYPE_FORWARD_ONLY]; + // - return 3 (\b11) => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_FORWARD_ONLY]; + // - return 4 (\b100) => [SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE]; + // - return 5 (\b101) => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE]; + // - return 6 (\b110) => [SQL_RESULT_SET_TYPE_FORWARD_ONLY, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE]; + // - return 7 (\b111) => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_FORWARD_ONLY, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE]; + // - return 8 (\b1000) => [SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE]; + // - ... + // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetType`. + SqlInfo_SQL_SUPPORTED_RESULT_SET_TYPES SqlInfo = 567 + // + // Returns an int32 bitmask value concurrency types supported for + // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_UNSPECIFIED`. + // + // For instance: + // - return 0 (\b0) => [] (no supported concurrency types for this result set type) + // - return 1 (\b1) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED] + // - return 2 (\b10) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 3 (\b11) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 6 (\b110) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. + SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_UNSPECIFIED SqlInfo = 568 + // + // Returns an int32 bitmask value concurrency types supported for + // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_FORWARD_ONLY`. + // + // For instance: + // - return 0 (\b0) => [] (no supported concurrency types for this result set type) + // - return 1 (\b1) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED] + // - return 2 (\b10) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 3 (\b11) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 6 (\b110) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. + SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_FORWARD_ONLY SqlInfo = 569 + // + // Returns an int32 bitmask value concurrency types supported for + // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE`. + // + // For instance: + // - return 0 (\b0) => [] (no supported concurrency types for this result set type) + // - return 1 (\b1) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED] + // - return 2 (\b10) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 3 (\b11) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 6 (\b110) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. + SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_SENSITIVE SqlInfo = 570 + // + // Returns an int32 bitmask value concurrency types supported for + // `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE`. + // + // For instance: + // - return 0 (\b0) => [] (no supported concurrency types for this result set type) + // - return 1 (\b1) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED] + // - return 2 (\b10) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 3 (\b11) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY] + // - return 4 (\b100) => [SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 6 (\b110) => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // - return 7 (\b111) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE] + // Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`. + SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_INSENSITIVE SqlInfo = 571 + // + // Retrieves a boolean value indicating whether this database supports batch updates. + // + // - false: if this database does not support batch updates; + // - true: if this database supports batch updates. + SqlInfo_SQL_BATCH_UPDATES_SUPPORTED SqlInfo = 572 + // + // Retrieves a boolean value indicating whether this database supports savepoints. + // + // Returns: + // - false: if this database does not support savepoints; + // - true: if this database supports savepoints. + SqlInfo_SQL_SAVEPOINTS_SUPPORTED SqlInfo = 573 + // + // Retrieves a boolean value indicating whether named parameters are supported in callable statements. + // + // Returns: + // - false: if named parameters in callable statements are unsupported; + // - true: if named parameters in callable statements are supported. + SqlInfo_SQL_NAMED_PARAMETERS_SUPPORTED SqlInfo = 574 + // + // Retrieves a boolean value indicating whether updates made to a LOB are made on a copy or directly to the LOB. + // + // Returns: + // - false: if updates made to a LOB are made directly to the LOB; + // - true: if updates made to a LOB are made on a copy. + SqlInfo_SQL_LOCATORS_UPDATE_COPY SqlInfo = 575 + // + // Retrieves a boolean value indicating whether invoking user-defined or vendor functions + // using the stored procedure escape syntax is supported. + // + // Returns: + // - false: if invoking user-defined or vendor functions using the stored procedure escape syntax is unsupported; + // - true: if invoking user-defined or vendor functions using the stored procedure escape syntax is supported. + SqlInfo_SQL_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED SqlInfo = 576 +) + +// Enum value maps for SqlInfo. +var ( + SqlInfo_name = map[int32]string{ + 0: "FLIGHT_SQL_SERVER_NAME", + 1: "FLIGHT_SQL_SERVER_VERSION", + 2: "FLIGHT_SQL_SERVER_ARROW_VERSION", + 3: "FLIGHT_SQL_SERVER_READ_ONLY", + 500: "SQL_DDL_CATALOG", + 501: "SQL_DDL_SCHEMA", + 502: "SQL_DDL_TABLE", + 503: "SQL_IDENTIFIER_CASE", + 504: "SQL_IDENTIFIER_QUOTE_CHAR", + 505: "SQL_QUOTED_IDENTIFIER_CASE", + 506: "SQL_ALL_TABLES_ARE_SELECTABLE", + 507: "SQL_NULL_ORDERING", + 508: "SQL_KEYWORDS", + 509: "SQL_NUMERIC_FUNCTIONS", + 510: "SQL_STRING_FUNCTIONS", + 511: "SQL_SYSTEM_FUNCTIONS", + 512: "SQL_DATETIME_FUNCTIONS", + 513: "SQL_SEARCH_STRING_ESCAPE", + 514: "SQL_EXTRA_NAME_CHARACTERS", + 515: "SQL_SUPPORTS_COLUMN_ALIASING", + 516: "SQL_NULL_PLUS_NULL_IS_NULL", + 517: "SQL_SUPPORTS_CONVERT", + 518: "SQL_SUPPORTS_TABLE_CORRELATION_NAMES", + 519: "SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES", + 520: "SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY", + 521: "SQL_SUPPORTS_ORDER_BY_UNRELATED", + 522: "SQL_SUPPORTED_GROUP_BY", + 523: "SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE", + 524: "SQL_SUPPORTS_NON_NULLABLE_COLUMNS", + 525: "SQL_SUPPORTED_GRAMMAR", + 526: "SQL_ANSI92_SUPPORTED_LEVEL", + 527: "SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY", + 528: "SQL_OUTER_JOINS_SUPPORT_LEVEL", + 529: "SQL_SCHEMA_TERM", + 530: "SQL_PROCEDURE_TERM", + 531: "SQL_CATALOG_TERM", + 532: "SQL_CATALOG_AT_START", + 533: "SQL_SCHEMAS_SUPPORTED_ACTIONS", + 534: "SQL_CATALOGS_SUPPORTED_ACTIONS", + 535: "SQL_SUPPORTED_POSITIONED_COMMANDS", + 536: "SQL_SELECT_FOR_UPDATE_SUPPORTED", + 537: "SQL_STORED_PROCEDURES_SUPPORTED", + 538: "SQL_SUPPORTED_SUBQUERIES", + 539: "SQL_CORRELATED_SUBQUERIES_SUPPORTED", + 540: "SQL_SUPPORTED_UNIONS", + 541: "SQL_MAX_BINARY_LITERAL_LENGTH", + 542: "SQL_MAX_CHAR_LITERAL_LENGTH", + 543: "SQL_MAX_COLUMN_NAME_LENGTH", + 544: "SQL_MAX_COLUMNS_IN_GROUP_BY", + 545: "SQL_MAX_COLUMNS_IN_INDEX", + 546: "SQL_MAX_COLUMNS_IN_ORDER_BY", + 547: "SQL_MAX_COLUMNS_IN_SELECT", + 548: "SQL_MAX_COLUMNS_IN_TABLE", + 549: "SQL_MAX_CONNECTIONS", + 550: "SQL_MAX_CURSOR_NAME_LENGTH", + 551: "SQL_MAX_INDEX_LENGTH", + 552: "SQL_DB_SCHEMA_NAME_LENGTH", + 553: "SQL_MAX_PROCEDURE_NAME_LENGTH", + 554: "SQL_MAX_CATALOG_NAME_LENGTH", + 555: "SQL_MAX_ROW_SIZE", + 556: "SQL_MAX_ROW_SIZE_INCLUDES_BLOBS", + 557: "SQL_MAX_STATEMENT_LENGTH", + 558: "SQL_MAX_STATEMENTS", + 559: "SQL_MAX_TABLE_NAME_LENGTH", + 560: "SQL_MAX_TABLES_IN_SELECT", + 561: "SQL_MAX_USERNAME_LENGTH", + 562: "SQL_DEFAULT_TRANSACTION_ISOLATION", + 563: "SQL_TRANSACTIONS_SUPPORTED", + 564: "SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS", + 565: "SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT", + 566: "SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED", + 567: "SQL_SUPPORTED_RESULT_SET_TYPES", + 568: "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_UNSPECIFIED", + 569: "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_FORWARD_ONLY", + 570: "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_SENSITIVE", + 571: "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_INSENSITIVE", + 572: "SQL_BATCH_UPDATES_SUPPORTED", + 573: "SQL_SAVEPOINTS_SUPPORTED", + 574: "SQL_NAMED_PARAMETERS_SUPPORTED", + 575: "SQL_LOCATORS_UPDATE_COPY", + 576: "SQL_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED", + } + SqlInfo_value = map[string]int32{ + "FLIGHT_SQL_SERVER_NAME": 0, + "FLIGHT_SQL_SERVER_VERSION": 1, + "FLIGHT_SQL_SERVER_ARROW_VERSION": 2, + "FLIGHT_SQL_SERVER_READ_ONLY": 3, + "SQL_DDL_CATALOG": 500, + "SQL_DDL_SCHEMA": 501, + "SQL_DDL_TABLE": 502, + "SQL_IDENTIFIER_CASE": 503, + "SQL_IDENTIFIER_QUOTE_CHAR": 504, + "SQL_QUOTED_IDENTIFIER_CASE": 505, + "SQL_ALL_TABLES_ARE_SELECTABLE": 506, + "SQL_NULL_ORDERING": 507, + "SQL_KEYWORDS": 508, + "SQL_NUMERIC_FUNCTIONS": 509, + "SQL_STRING_FUNCTIONS": 510, + "SQL_SYSTEM_FUNCTIONS": 511, + "SQL_DATETIME_FUNCTIONS": 512, + "SQL_SEARCH_STRING_ESCAPE": 513, + "SQL_EXTRA_NAME_CHARACTERS": 514, + "SQL_SUPPORTS_COLUMN_ALIASING": 515, + "SQL_NULL_PLUS_NULL_IS_NULL": 516, + "SQL_SUPPORTS_CONVERT": 517, + "SQL_SUPPORTS_TABLE_CORRELATION_NAMES": 518, + "SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES": 519, + "SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY": 520, + "SQL_SUPPORTS_ORDER_BY_UNRELATED": 521, + "SQL_SUPPORTED_GROUP_BY": 522, + "SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE": 523, + "SQL_SUPPORTS_NON_NULLABLE_COLUMNS": 524, + "SQL_SUPPORTED_GRAMMAR": 525, + "SQL_ANSI92_SUPPORTED_LEVEL": 526, + "SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY": 527, + "SQL_OUTER_JOINS_SUPPORT_LEVEL": 528, + "SQL_SCHEMA_TERM": 529, + "SQL_PROCEDURE_TERM": 530, + "SQL_CATALOG_TERM": 531, + "SQL_CATALOG_AT_START": 532, + "SQL_SCHEMAS_SUPPORTED_ACTIONS": 533, + "SQL_CATALOGS_SUPPORTED_ACTIONS": 534, + "SQL_SUPPORTED_POSITIONED_COMMANDS": 535, + "SQL_SELECT_FOR_UPDATE_SUPPORTED": 536, + "SQL_STORED_PROCEDURES_SUPPORTED": 537, + "SQL_SUPPORTED_SUBQUERIES": 538, + "SQL_CORRELATED_SUBQUERIES_SUPPORTED": 539, + "SQL_SUPPORTED_UNIONS": 540, + "SQL_MAX_BINARY_LITERAL_LENGTH": 541, + "SQL_MAX_CHAR_LITERAL_LENGTH": 542, + "SQL_MAX_COLUMN_NAME_LENGTH": 543, + "SQL_MAX_COLUMNS_IN_GROUP_BY": 544, + "SQL_MAX_COLUMNS_IN_INDEX": 545, + "SQL_MAX_COLUMNS_IN_ORDER_BY": 546, + "SQL_MAX_COLUMNS_IN_SELECT": 547, + "SQL_MAX_COLUMNS_IN_TABLE": 548, + "SQL_MAX_CONNECTIONS": 549, + "SQL_MAX_CURSOR_NAME_LENGTH": 550, + "SQL_MAX_INDEX_LENGTH": 551, + "SQL_DB_SCHEMA_NAME_LENGTH": 552, + "SQL_MAX_PROCEDURE_NAME_LENGTH": 553, + "SQL_MAX_CATALOG_NAME_LENGTH": 554, + "SQL_MAX_ROW_SIZE": 555, + "SQL_MAX_ROW_SIZE_INCLUDES_BLOBS": 556, + "SQL_MAX_STATEMENT_LENGTH": 557, + "SQL_MAX_STATEMENTS": 558, + "SQL_MAX_TABLE_NAME_LENGTH": 559, + "SQL_MAX_TABLES_IN_SELECT": 560, + "SQL_MAX_USERNAME_LENGTH": 561, + "SQL_DEFAULT_TRANSACTION_ISOLATION": 562, + "SQL_TRANSACTIONS_SUPPORTED": 563, + "SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS": 564, + "SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT": 565, + "SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED": 566, + "SQL_SUPPORTED_RESULT_SET_TYPES": 567, + "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_UNSPECIFIED": 568, + "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_FORWARD_ONLY": 569, + "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_SENSITIVE": 570, + "SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_INSENSITIVE": 571, + "SQL_BATCH_UPDATES_SUPPORTED": 572, + "SQL_SAVEPOINTS_SUPPORTED": 573, + "SQL_NAMED_PARAMETERS_SUPPORTED": 574, + "SQL_LOCATORS_UPDATE_COPY": 575, + "SQL_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED": 576, + } +) + +func (x SqlInfo) Enum() *SqlInfo { + p := new(SqlInfo) + *p = x + return p +} + +func (x SqlInfo) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlInfo) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[0].Descriptor() +} + +func (SqlInfo) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[0] +} + +func (x SqlInfo) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlInfo.Descriptor instead. +func (SqlInfo) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{0} +} + +type SqlSupportedCaseSensitivity int32 + +const ( + SqlSupportedCaseSensitivity_SQL_CASE_SENSITIVITY_UNKNOWN SqlSupportedCaseSensitivity = 0 + SqlSupportedCaseSensitivity_SQL_CASE_SENSITIVITY_CASE_INSENSITIVE SqlSupportedCaseSensitivity = 1 + SqlSupportedCaseSensitivity_SQL_CASE_SENSITIVITY_UPPERCASE SqlSupportedCaseSensitivity = 2 + SqlSupportedCaseSensitivity_SQL_CASE_SENSITIVITY_LOWERCASE SqlSupportedCaseSensitivity = 3 +) + +// Enum value maps for SqlSupportedCaseSensitivity. +var ( + SqlSupportedCaseSensitivity_name = map[int32]string{ + 0: "SQL_CASE_SENSITIVITY_UNKNOWN", + 1: "SQL_CASE_SENSITIVITY_CASE_INSENSITIVE", + 2: "SQL_CASE_SENSITIVITY_UPPERCASE", + 3: "SQL_CASE_SENSITIVITY_LOWERCASE", + } + SqlSupportedCaseSensitivity_value = map[string]int32{ + "SQL_CASE_SENSITIVITY_UNKNOWN": 0, + "SQL_CASE_SENSITIVITY_CASE_INSENSITIVE": 1, + "SQL_CASE_SENSITIVITY_UPPERCASE": 2, + "SQL_CASE_SENSITIVITY_LOWERCASE": 3, + } +) + +func (x SqlSupportedCaseSensitivity) Enum() *SqlSupportedCaseSensitivity { + p := new(SqlSupportedCaseSensitivity) + *p = x + return p +} + +func (x SqlSupportedCaseSensitivity) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlSupportedCaseSensitivity) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[1].Descriptor() +} + +func (SqlSupportedCaseSensitivity) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[1] +} + +func (x SqlSupportedCaseSensitivity) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlSupportedCaseSensitivity.Descriptor instead. +func (SqlSupportedCaseSensitivity) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{1} +} + +type SqlNullOrdering int32 + +const ( + SqlNullOrdering_SQL_NULLS_SORTED_HIGH SqlNullOrdering = 0 + SqlNullOrdering_SQL_NULLS_SORTED_LOW SqlNullOrdering = 1 + SqlNullOrdering_SQL_NULLS_SORTED_AT_START SqlNullOrdering = 2 + SqlNullOrdering_SQL_NULLS_SORTED_AT_END SqlNullOrdering = 3 +) + +// Enum value maps for SqlNullOrdering. +var ( + SqlNullOrdering_name = map[int32]string{ + 0: "SQL_NULLS_SORTED_HIGH", + 1: "SQL_NULLS_SORTED_LOW", + 2: "SQL_NULLS_SORTED_AT_START", + 3: "SQL_NULLS_SORTED_AT_END", + } + SqlNullOrdering_value = map[string]int32{ + "SQL_NULLS_SORTED_HIGH": 0, + "SQL_NULLS_SORTED_LOW": 1, + "SQL_NULLS_SORTED_AT_START": 2, + "SQL_NULLS_SORTED_AT_END": 3, + } +) + +func (x SqlNullOrdering) Enum() *SqlNullOrdering { + p := new(SqlNullOrdering) + *p = x + return p +} + +func (x SqlNullOrdering) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlNullOrdering) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[2].Descriptor() +} + +func (SqlNullOrdering) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[2] +} + +func (x SqlNullOrdering) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlNullOrdering.Descriptor instead. +func (SqlNullOrdering) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{2} +} + +type SupportedSqlGrammar int32 + +const ( + SupportedSqlGrammar_SQL_MINIMUM_GRAMMAR SupportedSqlGrammar = 0 + SupportedSqlGrammar_SQL_CORE_GRAMMAR SupportedSqlGrammar = 1 + SupportedSqlGrammar_SQL_EXTENDED_GRAMMAR SupportedSqlGrammar = 2 +) + +// Enum value maps for SupportedSqlGrammar. +var ( + SupportedSqlGrammar_name = map[int32]string{ + 0: "SQL_MINIMUM_GRAMMAR", + 1: "SQL_CORE_GRAMMAR", + 2: "SQL_EXTENDED_GRAMMAR", + } + SupportedSqlGrammar_value = map[string]int32{ + "SQL_MINIMUM_GRAMMAR": 0, + "SQL_CORE_GRAMMAR": 1, + "SQL_EXTENDED_GRAMMAR": 2, + } +) + +func (x SupportedSqlGrammar) Enum() *SupportedSqlGrammar { + p := new(SupportedSqlGrammar) + *p = x + return p +} + +func (x SupportedSqlGrammar) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SupportedSqlGrammar) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[3].Descriptor() +} + +func (SupportedSqlGrammar) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[3] +} + +func (x SupportedSqlGrammar) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SupportedSqlGrammar.Descriptor instead. +func (SupportedSqlGrammar) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{3} +} + +type SupportedAnsi92SqlGrammarLevel int32 + +const ( + SupportedAnsi92SqlGrammarLevel_ANSI92_ENTRY_SQL SupportedAnsi92SqlGrammarLevel = 0 + SupportedAnsi92SqlGrammarLevel_ANSI92_INTERMEDIATE_SQL SupportedAnsi92SqlGrammarLevel = 1 + SupportedAnsi92SqlGrammarLevel_ANSI92_FULL_SQL SupportedAnsi92SqlGrammarLevel = 2 +) + +// Enum value maps for SupportedAnsi92SqlGrammarLevel. +var ( + SupportedAnsi92SqlGrammarLevel_name = map[int32]string{ + 0: "ANSI92_ENTRY_SQL", + 1: "ANSI92_INTERMEDIATE_SQL", + 2: "ANSI92_FULL_SQL", + } + SupportedAnsi92SqlGrammarLevel_value = map[string]int32{ + "ANSI92_ENTRY_SQL": 0, + "ANSI92_INTERMEDIATE_SQL": 1, + "ANSI92_FULL_SQL": 2, + } +) + +func (x SupportedAnsi92SqlGrammarLevel) Enum() *SupportedAnsi92SqlGrammarLevel { + p := new(SupportedAnsi92SqlGrammarLevel) + *p = x + return p +} + +func (x SupportedAnsi92SqlGrammarLevel) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SupportedAnsi92SqlGrammarLevel) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[4].Descriptor() +} + +func (SupportedAnsi92SqlGrammarLevel) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[4] +} + +func (x SupportedAnsi92SqlGrammarLevel) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SupportedAnsi92SqlGrammarLevel.Descriptor instead. +func (SupportedAnsi92SqlGrammarLevel) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{4} +} + +type SqlOuterJoinsSupportLevel int32 + +const ( + SqlOuterJoinsSupportLevel_SQL_JOINS_UNSUPPORTED SqlOuterJoinsSupportLevel = 0 + SqlOuterJoinsSupportLevel_SQL_LIMITED_OUTER_JOINS SqlOuterJoinsSupportLevel = 1 + SqlOuterJoinsSupportLevel_SQL_FULL_OUTER_JOINS SqlOuterJoinsSupportLevel = 2 +) + +// Enum value maps for SqlOuterJoinsSupportLevel. +var ( + SqlOuterJoinsSupportLevel_name = map[int32]string{ + 0: "SQL_JOINS_UNSUPPORTED", + 1: "SQL_LIMITED_OUTER_JOINS", + 2: "SQL_FULL_OUTER_JOINS", + } + SqlOuterJoinsSupportLevel_value = map[string]int32{ + "SQL_JOINS_UNSUPPORTED": 0, + "SQL_LIMITED_OUTER_JOINS": 1, + "SQL_FULL_OUTER_JOINS": 2, + } +) + +func (x SqlOuterJoinsSupportLevel) Enum() *SqlOuterJoinsSupportLevel { + p := new(SqlOuterJoinsSupportLevel) + *p = x + return p +} + +func (x SqlOuterJoinsSupportLevel) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlOuterJoinsSupportLevel) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[5].Descriptor() +} + +func (SqlOuterJoinsSupportLevel) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[5] +} + +func (x SqlOuterJoinsSupportLevel) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlOuterJoinsSupportLevel.Descriptor instead. +func (SqlOuterJoinsSupportLevel) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{5} +} + +type SqlSupportedGroupBy int32 + +const ( + SqlSupportedGroupBy_SQL_GROUP_BY_UNRELATED SqlSupportedGroupBy = 0 + SqlSupportedGroupBy_SQL_GROUP_BY_BEYOND_SELECT SqlSupportedGroupBy = 1 +) + +// Enum value maps for SqlSupportedGroupBy. +var ( + SqlSupportedGroupBy_name = map[int32]string{ + 0: "SQL_GROUP_BY_UNRELATED", + 1: "SQL_GROUP_BY_BEYOND_SELECT", + } + SqlSupportedGroupBy_value = map[string]int32{ + "SQL_GROUP_BY_UNRELATED": 0, + "SQL_GROUP_BY_BEYOND_SELECT": 1, + } +) + +func (x SqlSupportedGroupBy) Enum() *SqlSupportedGroupBy { + p := new(SqlSupportedGroupBy) + *p = x + return p +} + +func (x SqlSupportedGroupBy) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlSupportedGroupBy) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[6].Descriptor() +} + +func (SqlSupportedGroupBy) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[6] +} + +func (x SqlSupportedGroupBy) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlSupportedGroupBy.Descriptor instead. +func (SqlSupportedGroupBy) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{6} +} + +type SqlSupportedElementActions int32 + +const ( + SqlSupportedElementActions_SQL_ELEMENT_IN_PROCEDURE_CALLS SqlSupportedElementActions = 0 + SqlSupportedElementActions_SQL_ELEMENT_IN_INDEX_DEFINITIONS SqlSupportedElementActions = 1 + SqlSupportedElementActions_SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS SqlSupportedElementActions = 2 +) + +// Enum value maps for SqlSupportedElementActions. +var ( + SqlSupportedElementActions_name = map[int32]string{ + 0: "SQL_ELEMENT_IN_PROCEDURE_CALLS", + 1: "SQL_ELEMENT_IN_INDEX_DEFINITIONS", + 2: "SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS", + } + SqlSupportedElementActions_value = map[string]int32{ + "SQL_ELEMENT_IN_PROCEDURE_CALLS": 0, + "SQL_ELEMENT_IN_INDEX_DEFINITIONS": 1, + "SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS": 2, + } +) + +func (x SqlSupportedElementActions) Enum() *SqlSupportedElementActions { + p := new(SqlSupportedElementActions) + *p = x + return p +} + +func (x SqlSupportedElementActions) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlSupportedElementActions) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[7].Descriptor() +} + +func (SqlSupportedElementActions) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[7] +} + +func (x SqlSupportedElementActions) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlSupportedElementActions.Descriptor instead. +func (SqlSupportedElementActions) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{7} +} + +type SqlSupportedPositionedCommands int32 + +const ( + SqlSupportedPositionedCommands_SQL_POSITIONED_DELETE SqlSupportedPositionedCommands = 0 + SqlSupportedPositionedCommands_SQL_POSITIONED_UPDATE SqlSupportedPositionedCommands = 1 +) + +// Enum value maps for SqlSupportedPositionedCommands. +var ( + SqlSupportedPositionedCommands_name = map[int32]string{ + 0: "SQL_POSITIONED_DELETE", + 1: "SQL_POSITIONED_UPDATE", + } + SqlSupportedPositionedCommands_value = map[string]int32{ + "SQL_POSITIONED_DELETE": 0, + "SQL_POSITIONED_UPDATE": 1, + } +) + +func (x SqlSupportedPositionedCommands) Enum() *SqlSupportedPositionedCommands { + p := new(SqlSupportedPositionedCommands) + *p = x + return p +} + +func (x SqlSupportedPositionedCommands) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlSupportedPositionedCommands) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[8].Descriptor() +} + +func (SqlSupportedPositionedCommands) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[8] +} + +func (x SqlSupportedPositionedCommands) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlSupportedPositionedCommands.Descriptor instead. +func (SqlSupportedPositionedCommands) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{8} +} + +type SqlSupportedSubqueries int32 + +const ( + SqlSupportedSubqueries_SQL_SUBQUERIES_IN_COMPARISONS SqlSupportedSubqueries = 0 + SqlSupportedSubqueries_SQL_SUBQUERIES_IN_EXISTS SqlSupportedSubqueries = 1 + SqlSupportedSubqueries_SQL_SUBQUERIES_IN_INS SqlSupportedSubqueries = 2 + SqlSupportedSubqueries_SQL_SUBQUERIES_IN_QUANTIFIEDS SqlSupportedSubqueries = 3 +) + +// Enum value maps for SqlSupportedSubqueries. +var ( + SqlSupportedSubqueries_name = map[int32]string{ + 0: "SQL_SUBQUERIES_IN_COMPARISONS", + 1: "SQL_SUBQUERIES_IN_EXISTS", + 2: "SQL_SUBQUERIES_IN_INS", + 3: "SQL_SUBQUERIES_IN_QUANTIFIEDS", + } + SqlSupportedSubqueries_value = map[string]int32{ + "SQL_SUBQUERIES_IN_COMPARISONS": 0, + "SQL_SUBQUERIES_IN_EXISTS": 1, + "SQL_SUBQUERIES_IN_INS": 2, + "SQL_SUBQUERIES_IN_QUANTIFIEDS": 3, + } +) + +func (x SqlSupportedSubqueries) Enum() *SqlSupportedSubqueries { + p := new(SqlSupportedSubqueries) + *p = x + return p +} + +func (x SqlSupportedSubqueries) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlSupportedSubqueries) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[9].Descriptor() +} + +func (SqlSupportedSubqueries) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[9] +} + +func (x SqlSupportedSubqueries) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlSupportedSubqueries.Descriptor instead. +func (SqlSupportedSubqueries) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{9} +} + +type SqlSupportedUnions int32 + +const ( + SqlSupportedUnions_SQL_UNION SqlSupportedUnions = 0 + SqlSupportedUnions_SQL_UNION_ALL SqlSupportedUnions = 1 +) + +// Enum value maps for SqlSupportedUnions. +var ( + SqlSupportedUnions_name = map[int32]string{ + 0: "SQL_UNION", + 1: "SQL_UNION_ALL", + } + SqlSupportedUnions_value = map[string]int32{ + "SQL_UNION": 0, + "SQL_UNION_ALL": 1, + } +) + +func (x SqlSupportedUnions) Enum() *SqlSupportedUnions { + p := new(SqlSupportedUnions) + *p = x + return p +} + +func (x SqlSupportedUnions) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlSupportedUnions) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[10].Descriptor() +} + +func (SqlSupportedUnions) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[10] +} + +func (x SqlSupportedUnions) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlSupportedUnions.Descriptor instead. +func (SqlSupportedUnions) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{10} +} + +type SqlTransactionIsolationLevel int32 + +const ( + SqlTransactionIsolationLevel_SQL_TRANSACTION_NONE SqlTransactionIsolationLevel = 0 + SqlTransactionIsolationLevel_SQL_TRANSACTION_READ_UNCOMMITTED SqlTransactionIsolationLevel = 1 + SqlTransactionIsolationLevel_SQL_TRANSACTION_READ_COMMITTED SqlTransactionIsolationLevel = 2 + SqlTransactionIsolationLevel_SQL_TRANSACTION_REPEATABLE_READ SqlTransactionIsolationLevel = 3 + SqlTransactionIsolationLevel_SQL_TRANSACTION_SERIALIZABLE SqlTransactionIsolationLevel = 4 +) + +// Enum value maps for SqlTransactionIsolationLevel. +var ( + SqlTransactionIsolationLevel_name = map[int32]string{ + 0: "SQL_TRANSACTION_NONE", + 1: "SQL_TRANSACTION_READ_UNCOMMITTED", + 2: "SQL_TRANSACTION_READ_COMMITTED", + 3: "SQL_TRANSACTION_REPEATABLE_READ", + 4: "SQL_TRANSACTION_SERIALIZABLE", + } + SqlTransactionIsolationLevel_value = map[string]int32{ + "SQL_TRANSACTION_NONE": 0, + "SQL_TRANSACTION_READ_UNCOMMITTED": 1, + "SQL_TRANSACTION_READ_COMMITTED": 2, + "SQL_TRANSACTION_REPEATABLE_READ": 3, + "SQL_TRANSACTION_SERIALIZABLE": 4, + } +) + +func (x SqlTransactionIsolationLevel) Enum() *SqlTransactionIsolationLevel { + p := new(SqlTransactionIsolationLevel) + *p = x + return p +} + +func (x SqlTransactionIsolationLevel) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlTransactionIsolationLevel) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[11].Descriptor() +} + +func (SqlTransactionIsolationLevel) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[11] +} + +func (x SqlTransactionIsolationLevel) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlTransactionIsolationLevel.Descriptor instead. +func (SqlTransactionIsolationLevel) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{11} +} + +type SqlSupportedTransactions int32 + +const ( + SqlSupportedTransactions_SQL_TRANSACTION_UNSPECIFIED SqlSupportedTransactions = 0 + SqlSupportedTransactions_SQL_DATA_DEFINITION_TRANSACTIONS SqlSupportedTransactions = 1 + SqlSupportedTransactions_SQL_DATA_MANIPULATION_TRANSACTIONS SqlSupportedTransactions = 2 +) + +// Enum value maps for SqlSupportedTransactions. +var ( + SqlSupportedTransactions_name = map[int32]string{ + 0: "SQL_TRANSACTION_UNSPECIFIED", + 1: "SQL_DATA_DEFINITION_TRANSACTIONS", + 2: "SQL_DATA_MANIPULATION_TRANSACTIONS", + } + SqlSupportedTransactions_value = map[string]int32{ + "SQL_TRANSACTION_UNSPECIFIED": 0, + "SQL_DATA_DEFINITION_TRANSACTIONS": 1, + "SQL_DATA_MANIPULATION_TRANSACTIONS": 2, + } +) + +func (x SqlSupportedTransactions) Enum() *SqlSupportedTransactions { + p := new(SqlSupportedTransactions) + *p = x + return p +} + +func (x SqlSupportedTransactions) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlSupportedTransactions) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[12].Descriptor() +} + +func (SqlSupportedTransactions) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[12] +} + +func (x SqlSupportedTransactions) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlSupportedTransactions.Descriptor instead. +func (SqlSupportedTransactions) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{12} +} + +type SqlSupportedResultSetType int32 + +const ( + SqlSupportedResultSetType_SQL_RESULT_SET_TYPE_UNSPECIFIED SqlSupportedResultSetType = 0 + SqlSupportedResultSetType_SQL_RESULT_SET_TYPE_FORWARD_ONLY SqlSupportedResultSetType = 1 + SqlSupportedResultSetType_SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE SqlSupportedResultSetType = 2 + SqlSupportedResultSetType_SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE SqlSupportedResultSetType = 3 +) + +// Enum value maps for SqlSupportedResultSetType. +var ( + SqlSupportedResultSetType_name = map[int32]string{ + 0: "SQL_RESULT_SET_TYPE_UNSPECIFIED", + 1: "SQL_RESULT_SET_TYPE_FORWARD_ONLY", + 2: "SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE", + 3: "SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE", + } + SqlSupportedResultSetType_value = map[string]int32{ + "SQL_RESULT_SET_TYPE_UNSPECIFIED": 0, + "SQL_RESULT_SET_TYPE_FORWARD_ONLY": 1, + "SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE": 2, + "SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE": 3, + } +) + +func (x SqlSupportedResultSetType) Enum() *SqlSupportedResultSetType { + p := new(SqlSupportedResultSetType) + *p = x + return p +} + +func (x SqlSupportedResultSetType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlSupportedResultSetType) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[13].Descriptor() +} + +func (SqlSupportedResultSetType) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[13] +} + +func (x SqlSupportedResultSetType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlSupportedResultSetType.Descriptor instead. +func (SqlSupportedResultSetType) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{13} +} + +type SqlSupportedResultSetConcurrency int32 + +const ( + SqlSupportedResultSetConcurrency_SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED SqlSupportedResultSetConcurrency = 0 + SqlSupportedResultSetConcurrency_SQL_RESULT_SET_CONCURRENCY_READ_ONLY SqlSupportedResultSetConcurrency = 1 + SqlSupportedResultSetConcurrency_SQL_RESULT_SET_CONCURRENCY_UPDATABLE SqlSupportedResultSetConcurrency = 2 +) + +// Enum value maps for SqlSupportedResultSetConcurrency. +var ( + SqlSupportedResultSetConcurrency_name = map[int32]string{ + 0: "SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED", + 1: "SQL_RESULT_SET_CONCURRENCY_READ_ONLY", + 2: "SQL_RESULT_SET_CONCURRENCY_UPDATABLE", + } + SqlSupportedResultSetConcurrency_value = map[string]int32{ + "SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED": 0, + "SQL_RESULT_SET_CONCURRENCY_READ_ONLY": 1, + "SQL_RESULT_SET_CONCURRENCY_UPDATABLE": 2, + } +) + +func (x SqlSupportedResultSetConcurrency) Enum() *SqlSupportedResultSetConcurrency { + p := new(SqlSupportedResultSetConcurrency) + *p = x + return p +} + +func (x SqlSupportedResultSetConcurrency) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlSupportedResultSetConcurrency) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[14].Descriptor() +} + +func (SqlSupportedResultSetConcurrency) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[14] +} + +func (x SqlSupportedResultSetConcurrency) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlSupportedResultSetConcurrency.Descriptor instead. +func (SqlSupportedResultSetConcurrency) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{14} +} + +type SqlSupportsConvert int32 + +const ( + SqlSupportsConvert_SQL_CONVERT_BIGINT SqlSupportsConvert = 0 + SqlSupportsConvert_SQL_CONVERT_BINARY SqlSupportsConvert = 1 + SqlSupportsConvert_SQL_CONVERT_BIT SqlSupportsConvert = 2 + SqlSupportsConvert_SQL_CONVERT_CHAR SqlSupportsConvert = 3 + SqlSupportsConvert_SQL_CONVERT_DATE SqlSupportsConvert = 4 + SqlSupportsConvert_SQL_CONVERT_DECIMAL SqlSupportsConvert = 5 + SqlSupportsConvert_SQL_CONVERT_FLOAT SqlSupportsConvert = 6 + SqlSupportsConvert_SQL_CONVERT_INTEGER SqlSupportsConvert = 7 + SqlSupportsConvert_SQL_CONVERT_INTERVAL_DAY_TIME SqlSupportsConvert = 8 + SqlSupportsConvert_SQL_CONVERT_INTERVAL_YEAR_MONTH SqlSupportsConvert = 9 + SqlSupportsConvert_SQL_CONVERT_LONGVARBINARY SqlSupportsConvert = 10 + SqlSupportsConvert_SQL_CONVERT_LONGVARCHAR SqlSupportsConvert = 11 + SqlSupportsConvert_SQL_CONVERT_NUMERIC SqlSupportsConvert = 12 + SqlSupportsConvert_SQL_CONVERT_REAL SqlSupportsConvert = 13 + SqlSupportsConvert_SQL_CONVERT_SMALLINT SqlSupportsConvert = 14 + SqlSupportsConvert_SQL_CONVERT_TIME SqlSupportsConvert = 15 + SqlSupportsConvert_SQL_CONVERT_TIMESTAMP SqlSupportsConvert = 16 + SqlSupportsConvert_SQL_CONVERT_TINYINT SqlSupportsConvert = 17 + SqlSupportsConvert_SQL_CONVERT_VARBINARY SqlSupportsConvert = 18 + SqlSupportsConvert_SQL_CONVERT_VARCHAR SqlSupportsConvert = 19 +) + +// Enum value maps for SqlSupportsConvert. +var ( + SqlSupportsConvert_name = map[int32]string{ + 0: "SQL_CONVERT_BIGINT", + 1: "SQL_CONVERT_BINARY", + 2: "SQL_CONVERT_BIT", + 3: "SQL_CONVERT_CHAR", + 4: "SQL_CONVERT_DATE", + 5: "SQL_CONVERT_DECIMAL", + 6: "SQL_CONVERT_FLOAT", + 7: "SQL_CONVERT_INTEGER", + 8: "SQL_CONVERT_INTERVAL_DAY_TIME", + 9: "SQL_CONVERT_INTERVAL_YEAR_MONTH", + 10: "SQL_CONVERT_LONGVARBINARY", + 11: "SQL_CONVERT_LONGVARCHAR", + 12: "SQL_CONVERT_NUMERIC", + 13: "SQL_CONVERT_REAL", + 14: "SQL_CONVERT_SMALLINT", + 15: "SQL_CONVERT_TIME", + 16: "SQL_CONVERT_TIMESTAMP", + 17: "SQL_CONVERT_TINYINT", + 18: "SQL_CONVERT_VARBINARY", + 19: "SQL_CONVERT_VARCHAR", + } + SqlSupportsConvert_value = map[string]int32{ + "SQL_CONVERT_BIGINT": 0, + "SQL_CONVERT_BINARY": 1, + "SQL_CONVERT_BIT": 2, + "SQL_CONVERT_CHAR": 3, + "SQL_CONVERT_DATE": 4, + "SQL_CONVERT_DECIMAL": 5, + "SQL_CONVERT_FLOAT": 6, + "SQL_CONVERT_INTEGER": 7, + "SQL_CONVERT_INTERVAL_DAY_TIME": 8, + "SQL_CONVERT_INTERVAL_YEAR_MONTH": 9, + "SQL_CONVERT_LONGVARBINARY": 10, + "SQL_CONVERT_LONGVARCHAR": 11, + "SQL_CONVERT_NUMERIC": 12, + "SQL_CONVERT_REAL": 13, + "SQL_CONVERT_SMALLINT": 14, + "SQL_CONVERT_TIME": 15, + "SQL_CONVERT_TIMESTAMP": 16, + "SQL_CONVERT_TINYINT": 17, + "SQL_CONVERT_VARBINARY": 18, + "SQL_CONVERT_VARCHAR": 19, + } +) + +func (x SqlSupportsConvert) Enum() *SqlSupportsConvert { + p := new(SqlSupportsConvert) + *p = x + return p +} + +func (x SqlSupportsConvert) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (SqlSupportsConvert) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[15].Descriptor() +} + +func (SqlSupportsConvert) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[15] +} + +func (x SqlSupportsConvert) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use SqlSupportsConvert.Descriptor instead. +func (SqlSupportsConvert) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{15} +} + +//* +// The JDBC/ODBC-defined type of any object. +// All the values here are the sames as in the JDBC and ODBC specs. +type XdbcDataType int32 + +const ( + XdbcDataType_XDBC_UNKNOWN_TYPE XdbcDataType = 0 + XdbcDataType_XDBC_CHAR XdbcDataType = 1 + XdbcDataType_XDBC_NUMERIC XdbcDataType = 2 + XdbcDataType_XDBC_DECIMAL XdbcDataType = 3 + XdbcDataType_XDBC_INTEGER XdbcDataType = 4 + XdbcDataType_XDBC_SMALLINT XdbcDataType = 5 + XdbcDataType_XDBC_FLOAT XdbcDataType = 6 + XdbcDataType_XDBC_REAL XdbcDataType = 7 + XdbcDataType_XDBC_DOUBLE XdbcDataType = 8 + XdbcDataType_XDBC_DATETIME XdbcDataType = 9 + XdbcDataType_XDBC_INTERVAL XdbcDataType = 10 + XdbcDataType_XDBC_VARCHAR XdbcDataType = 12 + XdbcDataType_XDBC_DATE XdbcDataType = 91 + XdbcDataType_XDBC_TIME XdbcDataType = 92 + XdbcDataType_XDBC_TIMESTAMP XdbcDataType = 93 + XdbcDataType_XDBC_LONGVARCHAR XdbcDataType = -1 + XdbcDataType_XDBC_BINARY XdbcDataType = -2 + XdbcDataType_XDBC_VARBINARY XdbcDataType = -3 + XdbcDataType_XDBC_LONGVARBINARY XdbcDataType = -4 + XdbcDataType_XDBC_BIGINT XdbcDataType = -5 + XdbcDataType_XDBC_TINYINT XdbcDataType = -6 + XdbcDataType_XDBC_BIT XdbcDataType = -7 + XdbcDataType_XDBC_WCHAR XdbcDataType = -8 + XdbcDataType_XDBC_WVARCHAR XdbcDataType = -9 +) + +// Enum value maps for XdbcDataType. +var ( + XdbcDataType_name = map[int32]string{ + 0: "XDBC_UNKNOWN_TYPE", + 1: "XDBC_CHAR", + 2: "XDBC_NUMERIC", + 3: "XDBC_DECIMAL", + 4: "XDBC_INTEGER", + 5: "XDBC_SMALLINT", + 6: "XDBC_FLOAT", + 7: "XDBC_REAL", + 8: "XDBC_DOUBLE", + 9: "XDBC_DATETIME", + 10: "XDBC_INTERVAL", + 12: "XDBC_VARCHAR", + 91: "XDBC_DATE", + 92: "XDBC_TIME", + 93: "XDBC_TIMESTAMP", + -1: "XDBC_LONGVARCHAR", + -2: "XDBC_BINARY", + -3: "XDBC_VARBINARY", + -4: "XDBC_LONGVARBINARY", + -5: "XDBC_BIGINT", + -6: "XDBC_TINYINT", + -7: "XDBC_BIT", + -8: "XDBC_WCHAR", + -9: "XDBC_WVARCHAR", + } + XdbcDataType_value = map[string]int32{ + "XDBC_UNKNOWN_TYPE": 0, + "XDBC_CHAR": 1, + "XDBC_NUMERIC": 2, + "XDBC_DECIMAL": 3, + "XDBC_INTEGER": 4, + "XDBC_SMALLINT": 5, + "XDBC_FLOAT": 6, + "XDBC_REAL": 7, + "XDBC_DOUBLE": 8, + "XDBC_DATETIME": 9, + "XDBC_INTERVAL": 10, + "XDBC_VARCHAR": 12, + "XDBC_DATE": 91, + "XDBC_TIME": 92, + "XDBC_TIMESTAMP": 93, + "XDBC_LONGVARCHAR": -1, + "XDBC_BINARY": -2, + "XDBC_VARBINARY": -3, + "XDBC_LONGVARBINARY": -4, + "XDBC_BIGINT": -5, + "XDBC_TINYINT": -6, + "XDBC_BIT": -7, + "XDBC_WCHAR": -8, + "XDBC_WVARCHAR": -9, + } +) + +func (x XdbcDataType) Enum() *XdbcDataType { + p := new(XdbcDataType) + *p = x + return p +} + +func (x XdbcDataType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (XdbcDataType) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[16].Descriptor() +} + +func (XdbcDataType) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[16] +} + +func (x XdbcDataType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use XdbcDataType.Descriptor instead. +func (XdbcDataType) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{16} +} + +//* +// Detailed subtype information for XDBC_TYPE_DATETIME and XDBC_TYPE_INTERVAL. +type XdbcDatetimeSubcode int32 + +const ( + XdbcDatetimeSubcode_XDBC_SUBCODE_UNKNOWN XdbcDatetimeSubcode = 0 + XdbcDatetimeSubcode_XDBC_SUBCODE_YEAR XdbcDatetimeSubcode = 1 + XdbcDatetimeSubcode_XDBC_SUBCODE_DATE XdbcDatetimeSubcode = 1 + XdbcDatetimeSubcode_XDBC_SUBCODE_TIME XdbcDatetimeSubcode = 2 + XdbcDatetimeSubcode_XDBC_SUBCODE_MONTH XdbcDatetimeSubcode = 2 + XdbcDatetimeSubcode_XDBC_SUBCODE_TIMESTAMP XdbcDatetimeSubcode = 3 + XdbcDatetimeSubcode_XDBC_SUBCODE_DAY XdbcDatetimeSubcode = 3 + XdbcDatetimeSubcode_XDBC_SUBCODE_TIME_WITH_TIMEZONE XdbcDatetimeSubcode = 4 + XdbcDatetimeSubcode_XDBC_SUBCODE_HOUR XdbcDatetimeSubcode = 4 + XdbcDatetimeSubcode_XDBC_SUBCODE_TIMESTAMP_WITH_TIMEZONE XdbcDatetimeSubcode = 5 + XdbcDatetimeSubcode_XDBC_SUBCODE_MINUTE XdbcDatetimeSubcode = 5 + XdbcDatetimeSubcode_XDBC_SUBCODE_SECOND XdbcDatetimeSubcode = 6 + XdbcDatetimeSubcode_XDBC_SUBCODE_YEAR_TO_MONTH XdbcDatetimeSubcode = 7 + XdbcDatetimeSubcode_XDBC_SUBCODE_DAY_TO_HOUR XdbcDatetimeSubcode = 8 + XdbcDatetimeSubcode_XDBC_SUBCODE_DAY_TO_MINUTE XdbcDatetimeSubcode = 9 + XdbcDatetimeSubcode_XDBC_SUBCODE_DAY_TO_SECOND XdbcDatetimeSubcode = 10 + XdbcDatetimeSubcode_XDBC_SUBCODE_HOUR_TO_MINUTE XdbcDatetimeSubcode = 11 + XdbcDatetimeSubcode_XDBC_SUBCODE_HOUR_TO_SECOND XdbcDatetimeSubcode = 12 + XdbcDatetimeSubcode_XDBC_SUBCODE_MINUTE_TO_SECOND XdbcDatetimeSubcode = 13 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_YEAR XdbcDatetimeSubcode = 101 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_MONTH XdbcDatetimeSubcode = 102 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_DAY XdbcDatetimeSubcode = 103 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_HOUR XdbcDatetimeSubcode = 104 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_MINUTE XdbcDatetimeSubcode = 105 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_SECOND XdbcDatetimeSubcode = 106 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_YEAR_TO_MONTH XdbcDatetimeSubcode = 107 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_DAY_TO_HOUR XdbcDatetimeSubcode = 108 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_DAY_TO_MINUTE XdbcDatetimeSubcode = 109 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_DAY_TO_SECOND XdbcDatetimeSubcode = 110 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_HOUR_TO_MINUTE XdbcDatetimeSubcode = 111 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_HOUR_TO_SECOND XdbcDatetimeSubcode = 112 + XdbcDatetimeSubcode_XDBC_SUBCODE_INTERVAL_MINUTE_TO_SECOND XdbcDatetimeSubcode = 113 +) + +// Enum value maps for XdbcDatetimeSubcode. +var ( + XdbcDatetimeSubcode_name = map[int32]string{ + 0: "XDBC_SUBCODE_UNKNOWN", + 1: "XDBC_SUBCODE_YEAR", + // Duplicate value: 1: "XDBC_SUBCODE_DATE", + 2: "XDBC_SUBCODE_TIME", + // Duplicate value: 2: "XDBC_SUBCODE_MONTH", + 3: "XDBC_SUBCODE_TIMESTAMP", + // Duplicate value: 3: "XDBC_SUBCODE_DAY", + 4: "XDBC_SUBCODE_TIME_WITH_TIMEZONE", + // Duplicate value: 4: "XDBC_SUBCODE_HOUR", + 5: "XDBC_SUBCODE_TIMESTAMP_WITH_TIMEZONE", + // Duplicate value: 5: "XDBC_SUBCODE_MINUTE", + 6: "XDBC_SUBCODE_SECOND", + 7: "XDBC_SUBCODE_YEAR_TO_MONTH", + 8: "XDBC_SUBCODE_DAY_TO_HOUR", + 9: "XDBC_SUBCODE_DAY_TO_MINUTE", + 10: "XDBC_SUBCODE_DAY_TO_SECOND", + 11: "XDBC_SUBCODE_HOUR_TO_MINUTE", + 12: "XDBC_SUBCODE_HOUR_TO_SECOND", + 13: "XDBC_SUBCODE_MINUTE_TO_SECOND", + 101: "XDBC_SUBCODE_INTERVAL_YEAR", + 102: "XDBC_SUBCODE_INTERVAL_MONTH", + 103: "XDBC_SUBCODE_INTERVAL_DAY", + 104: "XDBC_SUBCODE_INTERVAL_HOUR", + 105: "XDBC_SUBCODE_INTERVAL_MINUTE", + 106: "XDBC_SUBCODE_INTERVAL_SECOND", + 107: "XDBC_SUBCODE_INTERVAL_YEAR_TO_MONTH", + 108: "XDBC_SUBCODE_INTERVAL_DAY_TO_HOUR", + 109: "XDBC_SUBCODE_INTERVAL_DAY_TO_MINUTE", + 110: "XDBC_SUBCODE_INTERVAL_DAY_TO_SECOND", + 111: "XDBC_SUBCODE_INTERVAL_HOUR_TO_MINUTE", + 112: "XDBC_SUBCODE_INTERVAL_HOUR_TO_SECOND", + 113: "XDBC_SUBCODE_INTERVAL_MINUTE_TO_SECOND", + } + XdbcDatetimeSubcode_value = map[string]int32{ + "XDBC_SUBCODE_UNKNOWN": 0, + "XDBC_SUBCODE_YEAR": 1, + "XDBC_SUBCODE_DATE": 1, + "XDBC_SUBCODE_TIME": 2, + "XDBC_SUBCODE_MONTH": 2, + "XDBC_SUBCODE_TIMESTAMP": 3, + "XDBC_SUBCODE_DAY": 3, + "XDBC_SUBCODE_TIME_WITH_TIMEZONE": 4, + "XDBC_SUBCODE_HOUR": 4, + "XDBC_SUBCODE_TIMESTAMP_WITH_TIMEZONE": 5, + "XDBC_SUBCODE_MINUTE": 5, + "XDBC_SUBCODE_SECOND": 6, + "XDBC_SUBCODE_YEAR_TO_MONTH": 7, + "XDBC_SUBCODE_DAY_TO_HOUR": 8, + "XDBC_SUBCODE_DAY_TO_MINUTE": 9, + "XDBC_SUBCODE_DAY_TO_SECOND": 10, + "XDBC_SUBCODE_HOUR_TO_MINUTE": 11, + "XDBC_SUBCODE_HOUR_TO_SECOND": 12, + "XDBC_SUBCODE_MINUTE_TO_SECOND": 13, + "XDBC_SUBCODE_INTERVAL_YEAR": 101, + "XDBC_SUBCODE_INTERVAL_MONTH": 102, + "XDBC_SUBCODE_INTERVAL_DAY": 103, + "XDBC_SUBCODE_INTERVAL_HOUR": 104, + "XDBC_SUBCODE_INTERVAL_MINUTE": 105, + "XDBC_SUBCODE_INTERVAL_SECOND": 106, + "XDBC_SUBCODE_INTERVAL_YEAR_TO_MONTH": 107, + "XDBC_SUBCODE_INTERVAL_DAY_TO_HOUR": 108, + "XDBC_SUBCODE_INTERVAL_DAY_TO_MINUTE": 109, + "XDBC_SUBCODE_INTERVAL_DAY_TO_SECOND": 110, + "XDBC_SUBCODE_INTERVAL_HOUR_TO_MINUTE": 111, + "XDBC_SUBCODE_INTERVAL_HOUR_TO_SECOND": 112, + "XDBC_SUBCODE_INTERVAL_MINUTE_TO_SECOND": 113, + } +) + +func (x XdbcDatetimeSubcode) Enum() *XdbcDatetimeSubcode { + p := new(XdbcDatetimeSubcode) + *p = x + return p +} + +func (x XdbcDatetimeSubcode) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (XdbcDatetimeSubcode) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[17].Descriptor() +} + +func (XdbcDatetimeSubcode) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[17] +} + +func (x XdbcDatetimeSubcode) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use XdbcDatetimeSubcode.Descriptor instead. +func (XdbcDatetimeSubcode) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{17} +} + +type Nullable int32 + +const ( + //* + // Indicates that the fields does not allow the use of null values. + Nullable_NULLABILITY_NO_NULLS Nullable = 0 + //* + // Indicates that the fields allow the use of null values. + Nullable_NULLABILITY_NULLABLE Nullable = 1 + //* + // Indicates that nullability of the fields can not be determined. + Nullable_NULLABILITY_UNKNOWN Nullable = 2 +) + +// Enum value maps for Nullable. +var ( + Nullable_name = map[int32]string{ + 0: "NULLABILITY_NO_NULLS", + 1: "NULLABILITY_NULLABLE", + 2: "NULLABILITY_UNKNOWN", + } + Nullable_value = map[string]int32{ + "NULLABILITY_NO_NULLS": 0, + "NULLABILITY_NULLABLE": 1, + "NULLABILITY_UNKNOWN": 2, + } +) + +func (x Nullable) Enum() *Nullable { + p := new(Nullable) + *p = x + return p +} + +func (x Nullable) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (Nullable) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[18].Descriptor() +} + +func (Nullable) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[18] +} + +func (x Nullable) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use Nullable.Descriptor instead. +func (Nullable) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{18} +} + +type Searchable int32 + +const ( + //* + // Indicates that column can not be used in a WHERE clause. + Searchable_SEARCHABLE_NONE Searchable = 0 + //* + // Indicates that the column can be used in a WHERE clause if it is using a + // LIKE operator. + Searchable_SEARCHABLE_CHAR Searchable = 1 + //* + // Indicates that the column can be used In a WHERE clause with any + // operator other than LIKE. + // + // - Allowed operators: comparison, quantified comparison, BETWEEN, + // DISTINCT, IN, MATCH, and UNIQUE. + Searchable_SEARCHABLE_BASIC Searchable = 2 + //* + // Indicates that the column can be used in a WHERE clause using any operator. + Searchable_SEARCHABLE_FULL Searchable = 3 +) + +// Enum value maps for Searchable. +var ( + Searchable_name = map[int32]string{ + 0: "SEARCHABLE_NONE", + 1: "SEARCHABLE_CHAR", + 2: "SEARCHABLE_BASIC", + 3: "SEARCHABLE_FULL", + } + Searchable_value = map[string]int32{ + "SEARCHABLE_NONE": 0, + "SEARCHABLE_CHAR": 1, + "SEARCHABLE_BASIC": 2, + "SEARCHABLE_FULL": 3, + } +) + +func (x Searchable) Enum() *Searchable { + p := new(Searchable) + *p = x + return p +} + +func (x Searchable) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (Searchable) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[19].Descriptor() +} + +func (Searchable) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[19] +} + +func (x Searchable) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use Searchable.Descriptor instead. +func (Searchable) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{19} +} + +type UpdateDeleteRules int32 + +const ( + UpdateDeleteRules_CASCADE UpdateDeleteRules = 0 + UpdateDeleteRules_RESTRICT UpdateDeleteRules = 1 + UpdateDeleteRules_SET_NULL UpdateDeleteRules = 2 + UpdateDeleteRules_NO_ACTION UpdateDeleteRules = 3 + UpdateDeleteRules_SET_DEFAULT UpdateDeleteRules = 4 +) + +// Enum value maps for UpdateDeleteRules. +var ( + UpdateDeleteRules_name = map[int32]string{ + 0: "CASCADE", + 1: "RESTRICT", + 2: "SET_NULL", + 3: "NO_ACTION", + 4: "SET_DEFAULT", + } + UpdateDeleteRules_value = map[string]int32{ + "CASCADE": 0, + "RESTRICT": 1, + "SET_NULL": 2, + "NO_ACTION": 3, + "SET_DEFAULT": 4, + } +) + +func (x UpdateDeleteRules) Enum() *UpdateDeleteRules { + p := new(UpdateDeleteRules) + *p = x + return p +} + +func (x UpdateDeleteRules) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (UpdateDeleteRules) Descriptor() protoreflect.EnumDescriptor { + return file_FlightSql_proto_enumTypes[20].Descriptor() +} + +func (UpdateDeleteRules) Type() protoreflect.EnumType { + return &file_FlightSql_proto_enumTypes[20] +} + +func (x UpdateDeleteRules) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use UpdateDeleteRules.Descriptor instead. +func (UpdateDeleteRules) EnumDescriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{20} +} + +// +// Represents a metadata request. Used in the command member of FlightDescriptor +// for the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the metadata request. +// +// The returned Arrow schema will be: +// < +// info_name: uint32 not null, +// value: dense_union< +// string_value: utf8, +// bool_value: bool, +// bigint_value: int64, +// int32_bitmask: int32, +// string_list: list +// int32_to_int32_list_map: map> +// > +// where there is one row per requested piece of metadata information. +type CommandGetSqlInfo struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // + // Values are modelled after ODBC's SQLGetInfo() function. This information is intended to provide + // Flight SQL clients with basic, SQL syntax and SQL functions related information. + // More information types can be added in future releases. + // E.g. more SQL syntax support types, scalar functions support, type conversion support etc. + // + // Note that the set of metadata may expand. + // + // Initially, Flight SQL will support the following information types: + // - Server Information - Range [0-500) + // - Syntax Information - Range [500-1000) + // Range [0-10,000) is reserved for defaults (see SqlInfo enum for default options). + // Custom options should start at 10,000. + // + // If omitted, then all metadata will be retrieved. + // Flight SQL Servers may choose to include additional metadata above and beyond the specified set, however they must + // at least return the specified set. IDs ranging from 0 to 10,000 (exclusive) are reserved for future use. + // If additional metadata is included, the metadata IDs should start from 10,000. + Info []uint32 `protobuf:"varint,1,rep,packed,name=info,proto3" json:"info,omitempty"` +} + +func (x *CommandGetSqlInfo) Reset() { + *x = CommandGetSqlInfo{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandGetSqlInfo) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandGetSqlInfo) ProtoMessage() {} + +func (x *CommandGetSqlInfo) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandGetSqlInfo.ProtoReflect.Descriptor instead. +func (*CommandGetSqlInfo) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{0} +} + +func (x *CommandGetSqlInfo) GetInfo() []uint32 { + if x != nil { + return x.Info + } + return nil +} + +// +// Represents a request to retrieve information about data type supported on a Flight SQL enabled backend. +// Used in the command member of FlightDescriptor for the following RPC calls: +// - GetSchema: return the schema of the query. +// - GetFlightInfo: execute the catalog metadata request. +// +// The returned schema will be: +// < +// type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc), +// data_type: int not null (The SQL data type), +// column_size: int (The maximum size supported by that column. +// In case of exact numeric types, this represents the maximum precision. +// In case of string types, this represents the character length. +// In case of datetime data types, this represents the length in characters of the string representation. +// NULL is returned for data types where column size is not applicable.), +// literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for +// data types where a literal prefix is not applicable.), +// literal_suffix: utf8 (Character or characters used to terminate a literal, +// NULL is returned for data types where a literal suffix is not applicable.), +// create_params: list +// (A list of keywords corresponding to which parameters can be used when creating +// a column for that specific type. +// NULL is returned if there are no parameters for the data type definition.), +// nullable: int not null (Shows if the data type accepts a NULL value. The possible values can be seen in the +// Nullable enum.), +// case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons), +// searchable: int not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the +// Searchable enum.), +// unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is +// not applicable to the data type or the data type is not numeric.), +// fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.), +// auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute +// is not applicable to the data type or the data type is not numeric.), +// local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL +// is returned if a localized name is not supported by the data source), +// minimum_scale: int (The minimum scale of the data type on the data source. +// If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE +// columns both contain this value. NULL is returned if scale is not applicable.), +// maximum_scale: int (The maximum scale of the data type on the data source. +// NULL is returned if scale is not applicable.), +// sql_data_type: int not null (The value of the SQL DATA TYPE which has the same values +// as data_type value. Except for interval and datetime, which +// uses generic values. More info about those types can be +// obtained through datetime_subcode. The possible values can be seen +// in the XdbcDataType enum.), +// datetime_subcode: int (Only used when the SQL DATA TYPE is interval or datetime. It contains +// its sub types. For type different from interval and datetime, this value +// is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.), +// num_prec_radix: int (If the data type is an approximate numeric type, this column contains +// the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For +// exact numeric types, this column contains the value 10 to indicate that +// column size specifies a number of decimal digits. Otherwise, this column is NULL.), +// interval_precision: int (If the data type is an interval data type, then this column contains the value +// of the interval leading precision. Otherwise, this column is NULL. This fields +// is only relevant to be used by ODBC). +// > +// The returned data should be ordered by data_type and then by type_name. +type CommandGetXdbcTypeInfo struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // + // Specifies the data type to search for the info. + DataType *int32 `protobuf:"varint,1,opt,name=data_type,json=dataType,proto3,oneof" json:"data_type,omitempty"` +} + +func (x *CommandGetXdbcTypeInfo) Reset() { + *x = CommandGetXdbcTypeInfo{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandGetXdbcTypeInfo) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandGetXdbcTypeInfo) ProtoMessage() {} + +func (x *CommandGetXdbcTypeInfo) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandGetXdbcTypeInfo.ProtoReflect.Descriptor instead. +func (*CommandGetXdbcTypeInfo) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{1} +} + +func (x *CommandGetXdbcTypeInfo) GetDataType() int32 { + if x != nil && x.DataType != nil { + return *x.DataType + } + return 0 +} + +// +// Represents a request to retrieve the list of catalogs on a Flight SQL enabled backend. +// The definition of a catalog depends on vendor/implementation. It is usually the database itself +// Used in the command member of FlightDescriptor for the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. +// +// The returned Arrow schema will be: +// < +// catalog_name: utf8 not null +// > +// The returned data should be ordered by catalog_name. +type CommandGetCatalogs struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *CommandGetCatalogs) Reset() { + *x = CommandGetCatalogs{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandGetCatalogs) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandGetCatalogs) ProtoMessage() {} + +func (x *CommandGetCatalogs) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandGetCatalogs.ProtoReflect.Descriptor instead. +func (*CommandGetCatalogs) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{2} +} + +// +// Represents a request to retrieve the list of database schemas on a Flight SQL enabled backend. +// The definition of a database schema depends on vendor/implementation. It is usually a collection of tables. +// Used in the command member of FlightDescriptor for the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. +// +// The returned Arrow schema will be: +// < +// catalog_name: utf8, +// db_schema_name: utf8 not null +// > +// The returned data should be ordered by catalog_name, then db_schema_name. +type CommandGetDbSchemas struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // + // Specifies the Catalog to search for the tables. + // An empty string retrieves those without a catalog. + // If omitted the catalog name should not be used to narrow the search. + Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"` + // + // Specifies a filter pattern for schemas to search for. + // When no db_schema_filter_pattern is provided, the pattern will not be used to narrow the search. + // In the pattern string, two special characters can be used to denote matching rules: + // - "%" means to match any substring with 0 or more characters. + // - "_" means to match any one character. + DbSchemaFilterPattern *string `protobuf:"bytes,2,opt,name=db_schema_filter_pattern,json=dbSchemaFilterPattern,proto3,oneof" json:"db_schema_filter_pattern,omitempty"` +} + +func (x *CommandGetDbSchemas) Reset() { + *x = CommandGetDbSchemas{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandGetDbSchemas) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandGetDbSchemas) ProtoMessage() {} + +func (x *CommandGetDbSchemas) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandGetDbSchemas.ProtoReflect.Descriptor instead. +func (*CommandGetDbSchemas) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{3} +} + +func (x *CommandGetDbSchemas) GetCatalog() string { + if x != nil && x.Catalog != nil { + return *x.Catalog + } + return "" +} + +func (x *CommandGetDbSchemas) GetDbSchemaFilterPattern() string { + if x != nil && x.DbSchemaFilterPattern != nil { + return *x.DbSchemaFilterPattern + } + return "" +} + +// +// Represents a request to retrieve the list of tables, and optionally their schemas, on a Flight SQL enabled backend. +// Used in the command member of FlightDescriptor for the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. +// +// The returned Arrow schema will be: +// < +// catalog_name: utf8, +// db_schema_name: utf8, +// table_name: utf8 not null, +// table_type: utf8 not null, +// [optional] table_schema: bytes not null (schema of the table as described in Schema.fbs::Schema, +// it is serialized as an IPC message.) +// > +// Fields on table_schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested. +type CommandGetTables struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // + // Specifies the Catalog to search for the tables. + // An empty string retrieves those without a catalog. + // If omitted the catalog name should not be used to narrow the search. + Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"` + // + // Specifies a filter pattern for schemas to search for. + // When no db_schema_filter_pattern is provided, all schemas matching other filters are searched. + // In the pattern string, two special characters can be used to denote matching rules: + // - "%" means to match any substring with 0 or more characters. + // - "_" means to match any one character. + DbSchemaFilterPattern *string `protobuf:"bytes,2,opt,name=db_schema_filter_pattern,json=dbSchemaFilterPattern,proto3,oneof" json:"db_schema_filter_pattern,omitempty"` + // + // Specifies a filter pattern for tables to search for. + // When no table_name_filter_pattern is provided, all tables matching other filters are searched. + // In the pattern string, two special characters can be used to denote matching rules: + // - "%" means to match any substring with 0 or more characters. + // - "_" means to match any one character. + TableNameFilterPattern *string `protobuf:"bytes,3,opt,name=table_name_filter_pattern,json=tableNameFilterPattern,proto3,oneof" json:"table_name_filter_pattern,omitempty"` + // + // Specifies a filter of table types which must match. + // The table types depend on vendor/implementation. It is usually used to separate tables from views or system tables. + // TABLE, VIEW, and SYSTEM TABLE are commonly supported. + TableTypes []string `protobuf:"bytes,4,rep,name=table_types,json=tableTypes,proto3" json:"table_types,omitempty"` + // Specifies if the Arrow schema should be returned for found tables. + IncludeSchema bool `protobuf:"varint,5,opt,name=include_schema,json=includeSchema,proto3" json:"include_schema,omitempty"` +} + +func (x *CommandGetTables) Reset() { + *x = CommandGetTables{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandGetTables) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandGetTables) ProtoMessage() {} + +func (x *CommandGetTables) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandGetTables.ProtoReflect.Descriptor instead. +func (*CommandGetTables) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{4} +} + +func (x *CommandGetTables) GetCatalog() string { + if x != nil && x.Catalog != nil { + return *x.Catalog + } + return "" +} + +func (x *CommandGetTables) GetDbSchemaFilterPattern() string { + if x != nil && x.DbSchemaFilterPattern != nil { + return *x.DbSchemaFilterPattern + } + return "" +} + +func (x *CommandGetTables) GetTableNameFilterPattern() string { + if x != nil && x.TableNameFilterPattern != nil { + return *x.TableNameFilterPattern + } + return "" +} + +func (x *CommandGetTables) GetTableTypes() []string { + if x != nil { + return x.TableTypes + } + return nil +} + +func (x *CommandGetTables) GetIncludeSchema() bool { + if x != nil { + return x.IncludeSchema + } + return false +} + +// +// Represents a request to retrieve the list of table types on a Flight SQL enabled backend. +// The table types depend on vendor/implementation. It is usually used to separate tables from views or system tables. +// TABLE, VIEW, and SYSTEM TABLE are commonly supported. +// Used in the command member of FlightDescriptor for the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. +// +// The returned Arrow schema will be: +// < +// table_type: utf8 not null +// > +// The returned data should be ordered by table_type. +type CommandGetTableTypes struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *CommandGetTableTypes) Reset() { + *x = CommandGetTableTypes{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandGetTableTypes) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandGetTableTypes) ProtoMessage() {} + +func (x *CommandGetTableTypes) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[5] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandGetTableTypes.ProtoReflect.Descriptor instead. +func (*CommandGetTableTypes) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{5} +} + +// +// Represents a request to retrieve the primary keys of a table on a Flight SQL enabled backend. +// Used in the command member of FlightDescriptor for the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. +// +// The returned Arrow schema will be: +// < +// catalog_name: utf8, +// db_schema_name: utf8, +// table_name: utf8 not null, +// column_name: utf8 not null, +// key_name: utf8, +// key_sequence: int not null +// > +// The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence. +type CommandGetPrimaryKeys struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // + // Specifies the catalog to search for the table. + // An empty string retrieves those without a catalog. + // If omitted the catalog name should not be used to narrow the search. + Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"` + // + // Specifies the schema to search for the table. + // An empty string retrieves those without a schema. + // If omitted the schema name should not be used to narrow the search. + DbSchema *string `protobuf:"bytes,2,opt,name=db_schema,json=dbSchema,proto3,oneof" json:"db_schema,omitempty"` + // Specifies the table to get the primary keys for. + Table string `protobuf:"bytes,3,opt,name=table,proto3" json:"table,omitempty"` +} + +func (x *CommandGetPrimaryKeys) Reset() { + *x = CommandGetPrimaryKeys{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandGetPrimaryKeys) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandGetPrimaryKeys) ProtoMessage() {} + +func (x *CommandGetPrimaryKeys) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[6] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandGetPrimaryKeys.ProtoReflect.Descriptor instead. +func (*CommandGetPrimaryKeys) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{6} +} + +func (x *CommandGetPrimaryKeys) GetCatalog() string { + if x != nil && x.Catalog != nil { + return *x.Catalog + } + return "" +} + +func (x *CommandGetPrimaryKeys) GetDbSchema() string { + if x != nil && x.DbSchema != nil { + return *x.DbSchema + } + return "" +} + +func (x *CommandGetPrimaryKeys) GetTable() string { + if x != nil { + return x.Table + } + return "" +} + +// +// Represents a request to retrieve a description of the foreign key columns that reference the given table's +// primary key columns (the foreign keys exported by a table) of a table on a Flight SQL enabled backend. +// Used in the command member of FlightDescriptor for the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. +// +// The returned Arrow schema will be: +// < +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint1 not null, +// delete_rule: uint1 not null +// > +// The returned data should be ordered by fk_catalog_name, fk_db_schema_name, fk_table_name, fk_key_name, then key_sequence. +// update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum. +type CommandGetExportedKeys struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // + // Specifies the catalog to search for the foreign key table. + // An empty string retrieves those without a catalog. + // If omitted the catalog name should not be used to narrow the search. + Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"` + // + // Specifies the schema to search for the foreign key table. + // An empty string retrieves those without a schema. + // If omitted the schema name should not be used to narrow the search. + DbSchema *string `protobuf:"bytes,2,opt,name=db_schema,json=dbSchema,proto3,oneof" json:"db_schema,omitempty"` + // Specifies the foreign key table to get the foreign keys for. + Table string `protobuf:"bytes,3,opt,name=table,proto3" json:"table,omitempty"` +} + +func (x *CommandGetExportedKeys) Reset() { + *x = CommandGetExportedKeys{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandGetExportedKeys) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandGetExportedKeys) ProtoMessage() {} + +func (x *CommandGetExportedKeys) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandGetExportedKeys.ProtoReflect.Descriptor instead. +func (*CommandGetExportedKeys) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{7} +} + +func (x *CommandGetExportedKeys) GetCatalog() string { + if x != nil && x.Catalog != nil { + return *x.Catalog + } + return "" +} + +func (x *CommandGetExportedKeys) GetDbSchema() string { + if x != nil && x.DbSchema != nil { + return *x.DbSchema + } + return "" +} + +func (x *CommandGetExportedKeys) GetTable() string { + if x != nil { + return x.Table + } + return "" +} + +// +// Represents a request to retrieve the foreign keys of a table on a Flight SQL enabled backend. +// Used in the command member of FlightDescriptor for the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. +// +// The returned Arrow schema will be: +// < +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint1 not null, +// delete_rule: uint1 not null +// > +// The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence. +// update_rule and delete_rule returns a byte that is equivalent to actions: +// - 0 = CASCADE +// - 1 = RESTRICT +// - 2 = SET NULL +// - 3 = NO ACTION +// - 4 = SET DEFAULT +type CommandGetImportedKeys struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // + // Specifies the catalog to search for the primary key table. + // An empty string retrieves those without a catalog. + // If omitted the catalog name should not be used to narrow the search. + Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"` + // + // Specifies the schema to search for the primary key table. + // An empty string retrieves those without a schema. + // If omitted the schema name should not be used to narrow the search. + DbSchema *string `protobuf:"bytes,2,opt,name=db_schema,json=dbSchema,proto3,oneof" json:"db_schema,omitempty"` + // Specifies the primary key table to get the foreign keys for. + Table string `protobuf:"bytes,3,opt,name=table,proto3" json:"table,omitempty"` +} + +func (x *CommandGetImportedKeys) Reset() { + *x = CommandGetImportedKeys{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandGetImportedKeys) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandGetImportedKeys) ProtoMessage() {} + +func (x *CommandGetImportedKeys) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[8] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandGetImportedKeys.ProtoReflect.Descriptor instead. +func (*CommandGetImportedKeys) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{8} +} + +func (x *CommandGetImportedKeys) GetCatalog() string { + if x != nil && x.Catalog != nil { + return *x.Catalog + } + return "" +} + +func (x *CommandGetImportedKeys) GetDbSchema() string { + if x != nil && x.DbSchema != nil { + return *x.DbSchema + } + return "" +} + +func (x *CommandGetImportedKeys) GetTable() string { + if x != nil { + return x.Table + } + return "" +} + +// +// Represents a request to retrieve a description of the foreign key columns in the given foreign key table that +// reference the primary key or the columns representing a unique constraint of the parent table (could be the same +// or a different table) on a Flight SQL enabled backend. +// Used in the command member of FlightDescriptor for the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. +// +// The returned Arrow schema will be: +// < +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint1 not null, +// delete_rule: uint1 not null +// > +// The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence. +// update_rule and delete_rule returns a byte that is equivalent to actions: +// - 0 = CASCADE +// - 1 = RESTRICT +// - 2 = SET NULL +// - 3 = NO ACTION +// - 4 = SET DEFAULT +type CommandGetCrossReference struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + //* + // The catalog name where the parent table is. + // An empty string retrieves those without a catalog. + // If omitted the catalog name should not be used to narrow the search. + PkCatalog *string `protobuf:"bytes,1,opt,name=pk_catalog,json=pkCatalog,proto3,oneof" json:"pk_catalog,omitempty"` + //* + // The Schema name where the parent table is. + // An empty string retrieves those without a schema. + // If omitted the schema name should not be used to narrow the search. + PkDbSchema *string `protobuf:"bytes,2,opt,name=pk_db_schema,json=pkDbSchema,proto3,oneof" json:"pk_db_schema,omitempty"` + //* + // The parent table name. It cannot be null. + PkTable string `protobuf:"bytes,3,opt,name=pk_table,json=pkTable,proto3" json:"pk_table,omitempty"` + //* + // The catalog name where the foreign table is. + // An empty string retrieves those without a catalog. + // If omitted the catalog name should not be used to narrow the search. + FkCatalog *string `protobuf:"bytes,4,opt,name=fk_catalog,json=fkCatalog,proto3,oneof" json:"fk_catalog,omitempty"` + //* + // The schema name where the foreign table is. + // An empty string retrieves those without a schema. + // If omitted the schema name should not be used to narrow the search. + FkDbSchema *string `protobuf:"bytes,5,opt,name=fk_db_schema,json=fkDbSchema,proto3,oneof" json:"fk_db_schema,omitempty"` + //* + // The foreign table name. It cannot be null. + FkTable string `protobuf:"bytes,6,opt,name=fk_table,json=fkTable,proto3" json:"fk_table,omitempty"` +} + +func (x *CommandGetCrossReference) Reset() { + *x = CommandGetCrossReference{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandGetCrossReference) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandGetCrossReference) ProtoMessage() {} + +func (x *CommandGetCrossReference) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[9] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandGetCrossReference.ProtoReflect.Descriptor instead. +func (*CommandGetCrossReference) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{9} +} + +func (x *CommandGetCrossReference) GetPkCatalog() string { + if x != nil && x.PkCatalog != nil { + return *x.PkCatalog + } + return "" +} + +func (x *CommandGetCrossReference) GetPkDbSchema() string { + if x != nil && x.PkDbSchema != nil { + return *x.PkDbSchema + } + return "" +} + +func (x *CommandGetCrossReference) GetPkTable() string { + if x != nil { + return x.PkTable + } + return "" +} + +func (x *CommandGetCrossReference) GetFkCatalog() string { + if x != nil && x.FkCatalog != nil { + return *x.FkCatalog + } + return "" +} + +func (x *CommandGetCrossReference) GetFkDbSchema() string { + if x != nil && x.FkDbSchema != nil { + return *x.FkDbSchema + } + return "" +} + +func (x *CommandGetCrossReference) GetFkTable() string { + if x != nil { + return x.FkTable + } + return "" +} + +// +// Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend. +type ActionCreatePreparedStatementRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The valid SQL string to create a prepared statement for. + Query string `protobuf:"bytes,1,opt,name=query,proto3" json:"query,omitempty"` +} + +func (x *ActionCreatePreparedStatementRequest) Reset() { + *x = ActionCreatePreparedStatementRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ActionCreatePreparedStatementRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ActionCreatePreparedStatementRequest) ProtoMessage() {} + +func (x *ActionCreatePreparedStatementRequest) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[10] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ActionCreatePreparedStatementRequest.ProtoReflect.Descriptor instead. +func (*ActionCreatePreparedStatementRequest) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{10} +} + +func (x *ActionCreatePreparedStatementRequest) GetQuery() string { + if x != nil { + return x.Query + } + return "" +} + +// +// Wrap the result of a "GetPreparedStatement" action. +// +// The resultant PreparedStatement can be closed either: +// - Manually, through the "ClosePreparedStatement" action; +// - Automatically, by a server timeout. +type ActionCreatePreparedStatementResult struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Opaque handle for the prepared statement on the server. + PreparedStatementHandle []byte `protobuf:"bytes,1,opt,name=prepared_statement_handle,json=preparedStatementHandle,proto3" json:"prepared_statement_handle,omitempty"` + // If a result set generating query was provided, dataset_schema contains the + // schema of the dataset as described in Schema.fbs::Schema, it is serialized as an IPC message. + DatasetSchema []byte `protobuf:"bytes,2,opt,name=dataset_schema,json=datasetSchema,proto3" json:"dataset_schema,omitempty"` + // If the query provided contained parameters, parameter_schema contains the + // schema of the expected parameters as described in Schema.fbs::Schema, it is serialized as an IPC message. + ParameterSchema []byte `protobuf:"bytes,3,opt,name=parameter_schema,json=parameterSchema,proto3" json:"parameter_schema,omitempty"` +} + +func (x *ActionCreatePreparedStatementResult) Reset() { + *x = ActionCreatePreparedStatementResult{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ActionCreatePreparedStatementResult) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ActionCreatePreparedStatementResult) ProtoMessage() {} + +func (x *ActionCreatePreparedStatementResult) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[11] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ActionCreatePreparedStatementResult.ProtoReflect.Descriptor instead. +func (*ActionCreatePreparedStatementResult) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{11} +} + +func (x *ActionCreatePreparedStatementResult) GetPreparedStatementHandle() []byte { + if x != nil { + return x.PreparedStatementHandle + } + return nil +} + +func (x *ActionCreatePreparedStatementResult) GetDatasetSchema() []byte { + if x != nil { + return x.DatasetSchema + } + return nil +} + +func (x *ActionCreatePreparedStatementResult) GetParameterSchema() []byte { + if x != nil { + return x.ParameterSchema + } + return nil +} + +// +// Request message for the "ClosePreparedStatement" action on a Flight SQL enabled backend. +// Closes server resources associated with the prepared statement handle. +type ActionClosePreparedStatementRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Opaque handle for the prepared statement on the server. + PreparedStatementHandle []byte `protobuf:"bytes,1,opt,name=prepared_statement_handle,json=preparedStatementHandle,proto3" json:"prepared_statement_handle,omitempty"` +} + +func (x *ActionClosePreparedStatementRequest) Reset() { + *x = ActionClosePreparedStatementRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ActionClosePreparedStatementRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ActionClosePreparedStatementRequest) ProtoMessage() {} + +func (x *ActionClosePreparedStatementRequest) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[12] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ActionClosePreparedStatementRequest.ProtoReflect.Descriptor instead. +func (*ActionClosePreparedStatementRequest) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{12} +} + +func (x *ActionClosePreparedStatementRequest) GetPreparedStatementHandle() []byte { + if x != nil { + return x.PreparedStatementHandle + } + return nil +} + +// +// Represents a SQL query. Used in the command member of FlightDescriptor +// for the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - GetFlightInfo: execute the query. +type CommandStatementQuery struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The SQL syntax. + Query string `protobuf:"bytes,1,opt,name=query,proto3" json:"query,omitempty"` +} + +func (x *CommandStatementQuery) Reset() { + *x = CommandStatementQuery{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandStatementQuery) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandStatementQuery) ProtoMessage() {} + +func (x *CommandStatementQuery) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[13] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandStatementQuery.ProtoReflect.Descriptor instead. +func (*CommandStatementQuery) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{13} +} + +func (x *CommandStatementQuery) GetQuery() string { + if x != nil { + return x.Query + } + return "" +} + +//* +// Represents a ticket resulting from GetFlightInfo with a CommandStatementQuery. +// This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this. +type TicketStatementQuery struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Unique identifier for the instance of the statement to execute. + StatementHandle []byte `protobuf:"bytes,1,opt,name=statement_handle,json=statementHandle,proto3" json:"statement_handle,omitempty"` +} + +func (x *TicketStatementQuery) Reset() { + *x = TicketStatementQuery{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *TicketStatementQuery) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*TicketStatementQuery) ProtoMessage() {} + +func (x *TicketStatementQuery) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[14] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use TicketStatementQuery.ProtoReflect.Descriptor instead. +func (*TicketStatementQuery) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{14} +} + +func (x *TicketStatementQuery) GetStatementHandle() []byte { + if x != nil { + return x.StatementHandle + } + return nil +} + +// +// Represents an instance of executing a prepared statement. Used in the command member of FlightDescriptor for +// the following RPC calls: +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution. +// - GetFlightInfo: execute the prepared statement instance. +type CommandPreparedStatementQuery struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Opaque handle for the prepared statement on the server. + PreparedStatementHandle []byte `protobuf:"bytes,1,opt,name=prepared_statement_handle,json=preparedStatementHandle,proto3" json:"prepared_statement_handle,omitempty"` +} + +func (x *CommandPreparedStatementQuery) Reset() { + *x = CommandPreparedStatementQuery{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[15] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandPreparedStatementQuery) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandPreparedStatementQuery) ProtoMessage() {} + +func (x *CommandPreparedStatementQuery) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[15] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandPreparedStatementQuery.ProtoReflect.Descriptor instead. +func (*CommandPreparedStatementQuery) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{15} +} + +func (x *CommandPreparedStatementQuery) GetPreparedStatementHandle() []byte { + if x != nil { + return x.PreparedStatementHandle + } + return nil +} + +// +// Represents a SQL update query. Used in the command member of FlightDescriptor +// for the the RPC call DoPut to cause the server to execute the included SQL update. +type CommandStatementUpdate struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The SQL syntax. + Query string `protobuf:"bytes,1,opt,name=query,proto3" json:"query,omitempty"` +} + +func (x *CommandStatementUpdate) Reset() { + *x = CommandStatementUpdate{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[16] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandStatementUpdate) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandStatementUpdate) ProtoMessage() {} + +func (x *CommandStatementUpdate) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[16] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandStatementUpdate.ProtoReflect.Descriptor instead. +func (*CommandStatementUpdate) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{16} +} + +func (x *CommandStatementUpdate) GetQuery() string { + if x != nil { + return x.Query + } + return "" +} + +// +// Represents a SQL update query. Used in the command member of FlightDescriptor +// for the the RPC call DoPut to cause the server to execute the included +// prepared statement handle as an update. +type CommandPreparedStatementUpdate struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Opaque handle for the prepared statement on the server. + PreparedStatementHandle []byte `protobuf:"bytes,1,opt,name=prepared_statement_handle,json=preparedStatementHandle,proto3" json:"prepared_statement_handle,omitempty"` +} + +func (x *CommandPreparedStatementUpdate) Reset() { + *x = CommandPreparedStatementUpdate{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[17] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CommandPreparedStatementUpdate) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CommandPreparedStatementUpdate) ProtoMessage() {} + +func (x *CommandPreparedStatementUpdate) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[17] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CommandPreparedStatementUpdate.ProtoReflect.Descriptor instead. +func (*CommandPreparedStatementUpdate) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{17} +} + +func (x *CommandPreparedStatementUpdate) GetPreparedStatementHandle() []byte { + if x != nil { + return x.PreparedStatementHandle + } + return nil +} + +// +// Returned from the RPC call DoPut when a CommandStatementUpdate +// CommandPreparedStatementUpdate was in the request, containing +// results from the update. +type DoPutUpdateResult struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The number of records updated. A return value of -1 represents + // an unknown updated record count. + RecordCount int64 `protobuf:"varint,1,opt,name=record_count,json=recordCount,proto3" json:"record_count,omitempty"` +} + +func (x *DoPutUpdateResult) Reset() { + *x = DoPutUpdateResult{} + if protoimpl.UnsafeEnabled { + mi := &file_FlightSql_proto_msgTypes[18] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DoPutUpdateResult) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DoPutUpdateResult) ProtoMessage() {} + +func (x *DoPutUpdateResult) ProtoReflect() protoreflect.Message { + mi := &file_FlightSql_proto_msgTypes[18] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DoPutUpdateResult.ProtoReflect.Descriptor instead. +func (*DoPutUpdateResult) Descriptor() ([]byte, []int) { + return file_FlightSql_proto_rawDescGZIP(), []int{18} +} + +func (x *DoPutUpdateResult) GetRecordCount() int64 { + if x != nil { + return x.RecordCount + } + return 0 +} + +var file_FlightSql_proto_extTypes = []protoimpl.ExtensionInfo{ + { + ExtendedType: (*descriptor.MessageOptions)(nil), + ExtensionType: (*bool)(nil), + Field: 1000, + Name: "arrow.flight.protocol.sql.experimental", + Tag: "varint,1000,opt,name=experimental", + Filename: "FlightSql.proto", + }, +} + +// Extension fields to descriptor.MessageOptions. +var ( + // optional bool experimental = 1000; + E_Experimental = &file_FlightSql_proto_extTypes[0] +) + +var File_FlightSql_proto protoreflect.FileDescriptor + +var file_FlightSql_proto_rawDesc = []byte{ + 0x0a, 0x0f, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x53, 0x71, 0x6c, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x19, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x73, 0x71, 0x6c, 0x1a, 0x20, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x64, 0x65, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x2c, + 0x0a, 0x11, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x47, 0x65, 0x74, 0x53, 0x71, 0x6c, 0x49, + 0x6e, 0x66, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0d, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x22, 0x4d, 0x0a, 0x16, + 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x47, 0x65, 0x74, 0x58, 0x64, 0x62, 0x63, 0x54, 0x79, + 0x70, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x20, 0x0a, 0x09, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x48, 0x00, 0x52, 0x08, 0x64, 0x61, 0x74, + 0x61, 0x54, 0x79, 0x70, 0x65, 0x88, 0x01, 0x01, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x42, 0x0c, 0x0a, + 0x0a, 0x5f, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x22, 0x19, 0x0a, 0x12, 0x43, + 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x47, 0x65, 0x74, 0x43, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, + 0x73, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x22, 0xa0, 0x01, 0x0a, 0x13, 0x43, 0x6f, 0x6d, 0x6d, 0x61, + 0x6e, 0x64, 0x47, 0x65, 0x74, 0x44, 0x62, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x73, 0x12, 0x1d, + 0x0a, 0x07, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, + 0x00, 0x52, 0x07, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x88, 0x01, 0x01, 0x12, 0x3c, 0x0a, + 0x18, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x66, 0x69, 0x6c, 0x74, 0x65, + 0x72, 0x5f, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, + 0x01, 0x52, 0x15, 0x64, 0x62, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x46, 0x69, 0x6c, 0x74, 0x65, + 0x72, 0x50, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x88, 0x01, 0x01, 0x3a, 0x03, 0xc0, 0x3e, 0x01, + 0x42, 0x0a, 0x0a, 0x08, 0x5f, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x42, 0x1b, 0x0a, 0x19, + 0x5f, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x66, 0x69, 0x6c, 0x74, 0x65, + 0x72, 0x5f, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x22, 0xc3, 0x02, 0x0a, 0x10, 0x43, 0x6f, + 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x47, 0x65, 0x74, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x73, 0x12, 0x1d, + 0x0a, 0x07, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, + 0x00, 0x52, 0x07, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x88, 0x01, 0x01, 0x12, 0x3c, 0x0a, + 0x18, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x66, 0x69, 0x6c, 0x74, 0x65, + 0x72, 0x5f, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, + 0x01, 0x52, 0x15, 0x64, 0x62, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x46, 0x69, 0x6c, 0x74, 0x65, + 0x72, 0x50, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x88, 0x01, 0x01, 0x12, 0x3e, 0x0a, 0x19, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x5f, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, + 0x5f, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x48, 0x02, + 0x52, 0x16, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x46, 0x69, 0x6c, 0x74, 0x65, + 0x72, 0x50, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x88, 0x01, 0x01, 0x12, 0x1f, 0x0a, 0x0b, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, + 0x52, 0x0a, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x25, 0x0a, 0x0e, + 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x53, 0x63, 0x68, + 0x65, 0x6d, 0x61, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x42, 0x0a, 0x0a, 0x08, 0x5f, 0x63, 0x61, 0x74, + 0x61, 0x6c, 0x6f, 0x67, 0x42, 0x1b, 0x0a, 0x19, 0x5f, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, + 0x6d, 0x61, 0x5f, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x5f, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, + 0x6e, 0x42, 0x1c, 0x0a, 0x1a, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, + 0x5f, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x5f, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x22, + 0x1b, 0x0a, 0x14, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x47, 0x65, 0x74, 0x54, 0x61, 0x62, + 0x6c, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x22, 0x8d, 0x01, 0x0a, + 0x15, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x47, 0x65, 0x74, 0x50, 0x72, 0x69, 0x6d, 0x61, + 0x72, 0x79, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x1d, 0x0a, 0x07, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, + 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x07, 0x63, 0x61, 0x74, 0x61, 0x6c, + 0x6f, 0x67, 0x88, 0x01, 0x01, 0x12, 0x20, 0x0a, 0x09, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, + 0x6d, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x01, 0x52, 0x08, 0x64, 0x62, 0x53, 0x63, + 0x68, 0x65, 0x6d, 0x61, 0x88, 0x01, 0x01, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x3a, 0x03, 0xc0, + 0x3e, 0x01, 0x42, 0x0a, 0x0a, 0x08, 0x5f, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x42, 0x0c, + 0x0a, 0x0a, 0x5f, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x22, 0x8e, 0x01, 0x0a, + 0x16, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x47, 0x65, 0x74, 0x45, 0x78, 0x70, 0x6f, 0x72, + 0x74, 0x65, 0x64, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x1d, 0x0a, 0x07, 0x63, 0x61, 0x74, 0x61, 0x6c, + 0x6f, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x07, 0x63, 0x61, 0x74, 0x61, + 0x6c, 0x6f, 0x67, 0x88, 0x01, 0x01, 0x12, 0x20, 0x0a, 0x09, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, + 0x65, 0x6d, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x01, 0x52, 0x08, 0x64, 0x62, 0x53, + 0x63, 0x68, 0x65, 0x6d, 0x61, 0x88, 0x01, 0x01, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x61, 0x62, 0x6c, + 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x3a, 0x03, + 0xc0, 0x3e, 0x01, 0x42, 0x0a, 0x0a, 0x08, 0x5f, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x42, + 0x0c, 0x0a, 0x0a, 0x5f, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x22, 0x8e, 0x01, + 0x0a, 0x16, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x47, 0x65, 0x74, 0x49, 0x6d, 0x70, 0x6f, + 0x72, 0x74, 0x65, 0x64, 0x4b, 0x65, 0x79, 0x73, 0x12, 0x1d, 0x0a, 0x07, 0x63, 0x61, 0x74, 0x61, + 0x6c, 0x6f, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x07, 0x63, 0x61, 0x74, + 0x61, 0x6c, 0x6f, 0x67, 0x88, 0x01, 0x01, 0x12, 0x20, 0x0a, 0x09, 0x64, 0x62, 0x5f, 0x73, 0x63, + 0x68, 0x65, 0x6d, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x01, 0x52, 0x08, 0x64, 0x62, + 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x88, 0x01, 0x01, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x61, 0x62, + 0x6c, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x3a, + 0x03, 0xc0, 0x3e, 0x01, 0x42, 0x0a, 0x0a, 0x08, 0x5f, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, + 0x42, 0x0c, 0x0a, 0x0a, 0x5f, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x22, 0xab, + 0x02, 0x0a, 0x18, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x47, 0x65, 0x74, 0x43, 0x72, 0x6f, + 0x73, 0x73, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x12, 0x22, 0x0a, 0x0a, 0x70, + 0x6b, 0x5f, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, + 0x00, 0x52, 0x09, 0x70, 0x6b, 0x43, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x88, 0x01, 0x01, 0x12, + 0x25, 0x0a, 0x0c, 0x70, 0x6b, 0x5f, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x01, 0x52, 0x0a, 0x70, 0x6b, 0x44, 0x62, 0x53, 0x63, 0x68, + 0x65, 0x6d, 0x61, 0x88, 0x01, 0x01, 0x12, 0x19, 0x0a, 0x08, 0x70, 0x6b, 0x5f, 0x74, 0x61, 0x62, + 0x6c, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x70, 0x6b, 0x54, 0x61, 0x62, 0x6c, + 0x65, 0x12, 0x22, 0x0a, 0x0a, 0x66, 0x6b, 0x5f, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x09, 0x48, 0x02, 0x52, 0x09, 0x66, 0x6b, 0x43, 0x61, 0x74, 0x61, 0x6c, + 0x6f, 0x67, 0x88, 0x01, 0x01, 0x12, 0x25, 0x0a, 0x0c, 0x66, 0x6b, 0x5f, 0x64, 0x62, 0x5f, 0x73, + 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x48, 0x03, 0x52, 0x0a, 0x66, + 0x6b, 0x44, 0x62, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x88, 0x01, 0x01, 0x12, 0x19, 0x0a, 0x08, + 0x66, 0x6b, 0x5f, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, + 0x66, 0x6b, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x42, 0x0d, 0x0a, 0x0b, + 0x5f, 0x70, 0x6b, 0x5f, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x42, 0x0f, 0x0a, 0x0d, 0x5f, + 0x70, 0x6b, 0x5f, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x42, 0x0d, 0x0a, 0x0b, + 0x5f, 0x66, 0x6b, 0x5f, 0x63, 0x61, 0x74, 0x61, 0x6c, 0x6f, 0x67, 0x42, 0x0f, 0x0a, 0x0d, 0x5f, + 0x66, 0x6b, 0x5f, 0x64, 0x62, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x22, 0x41, 0x0a, 0x24, + 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x50, 0x72, 0x65, 0x70, + 0x61, 0x72, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x22, + 0xb8, 0x01, 0x0a, 0x23, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, + 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, + 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x3a, 0x0a, 0x19, 0x70, 0x72, 0x65, 0x70, 0x61, + 0x72, 0x65, 0x64, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x68, 0x61, + 0x6e, 0x64, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x17, 0x70, 0x72, 0x65, 0x70, + 0x61, 0x72, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x48, 0x61, 0x6e, + 0x64, 0x6c, 0x65, 0x12, 0x25, 0x0a, 0x0e, 0x64, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x5f, 0x73, + 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0d, 0x64, 0x61, 0x74, + 0x61, 0x73, 0x65, 0x74, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x61, + 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0f, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x53, + 0x63, 0x68, 0x65, 0x6d, 0x61, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x22, 0x66, 0x0a, 0x23, 0x41, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x6c, 0x6f, 0x73, 0x65, 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x65, + 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x12, 0x3a, 0x0a, 0x19, 0x70, 0x72, 0x65, 0x70, 0x61, 0x72, 0x65, 0x64, 0x5f, 0x73, 0x74, + 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0c, 0x52, 0x17, 0x70, 0x72, 0x65, 0x70, 0x61, 0x72, 0x65, 0x64, 0x53, 0x74, + 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x3a, 0x03, 0xc0, + 0x3e, 0x01, 0x22, 0x32, 0x0a, 0x15, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x53, 0x74, 0x61, + 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x51, 0x75, 0x65, 0x72, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x71, + 0x75, 0x65, 0x72, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x71, 0x75, 0x65, 0x72, + 0x79, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x22, 0x46, 0x0a, 0x14, 0x54, 0x69, 0x63, 0x6b, 0x65, 0x74, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x51, 0x75, 0x65, 0x72, 0x79, 0x12, 0x29, + 0x0a, 0x10, 0x73, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x68, 0x61, 0x6e, 0x64, + 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x6d, + 0x65, 0x6e, 0x74, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x22, 0x60, + 0x0a, 0x1d, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x65, + 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x51, 0x75, 0x65, 0x72, 0x79, 0x12, + 0x3a, 0x0a, 0x19, 0x70, 0x72, 0x65, 0x70, 0x61, 0x72, 0x65, 0x64, 0x5f, 0x73, 0x74, 0x61, 0x74, + 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x68, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0c, 0x52, 0x17, 0x70, 0x72, 0x65, 0x70, 0x61, 0x72, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x48, 0x61, 0x6e, 0x64, 0x6c, 0x65, 0x3a, 0x03, 0xc0, 0x3e, 0x01, + 0x22, 0x33, 0x0a, 0x16, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x6d, 0x65, 0x6e, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x71, 0x75, + 0x65, 0x72, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, + 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x22, 0x61, 0x0a, 0x1e, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, + 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, + 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x12, 0x3a, 0x0a, 0x19, 0x70, 0x72, 0x65, 0x70, 0x61, + 0x72, 0x65, 0x64, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x68, 0x61, + 0x6e, 0x64, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x17, 0x70, 0x72, 0x65, 0x70, + 0x61, 0x72, 0x65, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x48, 0x61, 0x6e, + 0x64, 0x6c, 0x65, 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x22, 0x3b, 0x0a, 0x11, 0x44, 0x6f, 0x50, 0x75, + 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x21, 0x0a, + 0x0c, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x03, 0x52, 0x0b, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x43, 0x6f, 0x75, 0x6e, 0x74, + 0x3a, 0x03, 0xc0, 0x3e, 0x01, 0x2a, 0x8c, 0x16, 0x0a, 0x07, 0x53, 0x71, 0x6c, 0x49, 0x6e, 0x66, + 0x6f, 0x12, 0x1a, 0x0a, 0x16, 0x46, 0x4c, 0x49, 0x47, 0x48, 0x54, 0x5f, 0x53, 0x51, 0x4c, 0x5f, + 0x53, 0x45, 0x52, 0x56, 0x45, 0x52, 0x5f, 0x4e, 0x41, 0x4d, 0x45, 0x10, 0x00, 0x12, 0x1d, 0x0a, + 0x19, 0x46, 0x4c, 0x49, 0x47, 0x48, 0x54, 0x5f, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x45, 0x52, 0x56, + 0x45, 0x52, 0x5f, 0x56, 0x45, 0x52, 0x53, 0x49, 0x4f, 0x4e, 0x10, 0x01, 0x12, 0x23, 0x0a, 0x1f, + 0x46, 0x4c, 0x49, 0x47, 0x48, 0x54, 0x5f, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x45, 0x52, 0x56, 0x45, + 0x52, 0x5f, 0x41, 0x52, 0x52, 0x4f, 0x57, 0x5f, 0x56, 0x45, 0x52, 0x53, 0x49, 0x4f, 0x4e, 0x10, + 0x02, 0x12, 0x1f, 0x0a, 0x1b, 0x46, 0x4c, 0x49, 0x47, 0x48, 0x54, 0x5f, 0x53, 0x51, 0x4c, 0x5f, + 0x53, 0x45, 0x52, 0x56, 0x45, 0x52, 0x5f, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, + 0x10, 0x03, 0x12, 0x14, 0x0a, 0x0f, 0x53, 0x51, 0x4c, 0x5f, 0x44, 0x44, 0x4c, 0x5f, 0x43, 0x41, + 0x54, 0x41, 0x4c, 0x4f, 0x47, 0x10, 0xf4, 0x03, 0x12, 0x13, 0x0a, 0x0e, 0x53, 0x51, 0x4c, 0x5f, + 0x44, 0x44, 0x4c, 0x5f, 0x53, 0x43, 0x48, 0x45, 0x4d, 0x41, 0x10, 0xf5, 0x03, 0x12, 0x12, 0x0a, + 0x0d, 0x53, 0x51, 0x4c, 0x5f, 0x44, 0x44, 0x4c, 0x5f, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x10, 0xf6, + 0x03, 0x12, 0x18, 0x0a, 0x13, 0x53, 0x51, 0x4c, 0x5f, 0x49, 0x44, 0x45, 0x4e, 0x54, 0x49, 0x46, + 0x49, 0x45, 0x52, 0x5f, 0x43, 0x41, 0x53, 0x45, 0x10, 0xf7, 0x03, 0x12, 0x1e, 0x0a, 0x19, 0x53, + 0x51, 0x4c, 0x5f, 0x49, 0x44, 0x45, 0x4e, 0x54, 0x49, 0x46, 0x49, 0x45, 0x52, 0x5f, 0x51, 0x55, + 0x4f, 0x54, 0x45, 0x5f, 0x43, 0x48, 0x41, 0x52, 0x10, 0xf8, 0x03, 0x12, 0x1f, 0x0a, 0x1a, 0x53, + 0x51, 0x4c, 0x5f, 0x51, 0x55, 0x4f, 0x54, 0x45, 0x44, 0x5f, 0x49, 0x44, 0x45, 0x4e, 0x54, 0x49, + 0x46, 0x49, 0x45, 0x52, 0x5f, 0x43, 0x41, 0x53, 0x45, 0x10, 0xf9, 0x03, 0x12, 0x22, 0x0a, 0x1d, + 0x53, 0x51, 0x4c, 0x5f, 0x41, 0x4c, 0x4c, 0x5f, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x53, 0x5f, 0x41, + 0x52, 0x45, 0x5f, 0x53, 0x45, 0x4c, 0x45, 0x43, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x10, 0xfa, 0x03, + 0x12, 0x16, 0x0a, 0x11, 0x53, 0x51, 0x4c, 0x5f, 0x4e, 0x55, 0x4c, 0x4c, 0x5f, 0x4f, 0x52, 0x44, + 0x45, 0x52, 0x49, 0x4e, 0x47, 0x10, 0xfb, 0x03, 0x12, 0x11, 0x0a, 0x0c, 0x53, 0x51, 0x4c, 0x5f, + 0x4b, 0x45, 0x59, 0x57, 0x4f, 0x52, 0x44, 0x53, 0x10, 0xfc, 0x03, 0x12, 0x1a, 0x0a, 0x15, 0x53, + 0x51, 0x4c, 0x5f, 0x4e, 0x55, 0x4d, 0x45, 0x52, 0x49, 0x43, 0x5f, 0x46, 0x55, 0x4e, 0x43, 0x54, + 0x49, 0x4f, 0x4e, 0x53, 0x10, 0xfd, 0x03, 0x12, 0x19, 0x0a, 0x14, 0x53, 0x51, 0x4c, 0x5f, 0x53, + 0x54, 0x52, 0x49, 0x4e, 0x47, 0x5f, 0x46, 0x55, 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x10, + 0xfe, 0x03, 0x12, 0x19, 0x0a, 0x14, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x59, 0x53, 0x54, 0x45, 0x4d, + 0x5f, 0x46, 0x55, 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x10, 0xff, 0x03, 0x12, 0x1b, 0x0a, + 0x16, 0x53, 0x51, 0x4c, 0x5f, 0x44, 0x41, 0x54, 0x45, 0x54, 0x49, 0x4d, 0x45, 0x5f, 0x46, 0x55, + 0x4e, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x10, 0x80, 0x04, 0x12, 0x1d, 0x0a, 0x18, 0x53, 0x51, + 0x4c, 0x5f, 0x53, 0x45, 0x41, 0x52, 0x43, 0x48, 0x5f, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x5f, + 0x45, 0x53, 0x43, 0x41, 0x50, 0x45, 0x10, 0x81, 0x04, 0x12, 0x1e, 0x0a, 0x19, 0x53, 0x51, 0x4c, + 0x5f, 0x45, 0x58, 0x54, 0x52, 0x41, 0x5f, 0x4e, 0x41, 0x4d, 0x45, 0x5f, 0x43, 0x48, 0x41, 0x52, + 0x41, 0x43, 0x54, 0x45, 0x52, 0x53, 0x10, 0x82, 0x04, 0x12, 0x21, 0x0a, 0x1c, 0x53, 0x51, 0x4c, + 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x53, 0x5f, 0x43, 0x4f, 0x4c, 0x55, 0x4d, 0x4e, + 0x5f, 0x41, 0x4c, 0x49, 0x41, 0x53, 0x49, 0x4e, 0x47, 0x10, 0x83, 0x04, 0x12, 0x1f, 0x0a, 0x1a, + 0x53, 0x51, 0x4c, 0x5f, 0x4e, 0x55, 0x4c, 0x4c, 0x5f, 0x50, 0x4c, 0x55, 0x53, 0x5f, 0x4e, 0x55, + 0x4c, 0x4c, 0x5f, 0x49, 0x53, 0x5f, 0x4e, 0x55, 0x4c, 0x4c, 0x10, 0x84, 0x04, 0x12, 0x19, 0x0a, + 0x14, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x53, 0x5f, 0x43, 0x4f, + 0x4e, 0x56, 0x45, 0x52, 0x54, 0x10, 0x85, 0x04, 0x12, 0x29, 0x0a, 0x24, 0x53, 0x51, 0x4c, 0x5f, + 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x53, 0x5f, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x43, + 0x4f, 0x52, 0x52, 0x45, 0x4c, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x4e, 0x41, 0x4d, 0x45, 0x53, + 0x10, 0x86, 0x04, 0x12, 0x33, 0x0a, 0x2e, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, + 0x52, 0x54, 0x53, 0x5f, 0x44, 0x49, 0x46, 0x46, 0x45, 0x52, 0x45, 0x4e, 0x54, 0x5f, 0x54, 0x41, + 0x42, 0x4c, 0x45, 0x5f, 0x43, 0x4f, 0x52, 0x52, 0x45, 0x4c, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, + 0x4e, 0x41, 0x4d, 0x45, 0x53, 0x10, 0x87, 0x04, 0x12, 0x29, 0x0a, 0x24, 0x53, 0x51, 0x4c, 0x5f, + 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x53, 0x5f, 0x45, 0x58, 0x50, 0x52, 0x45, 0x53, 0x53, + 0x49, 0x4f, 0x4e, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x4f, 0x52, 0x44, 0x45, 0x52, 0x5f, 0x42, 0x59, + 0x10, 0x88, 0x04, 0x12, 0x24, 0x0a, 0x1f, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, + 0x52, 0x54, 0x53, 0x5f, 0x4f, 0x52, 0x44, 0x45, 0x52, 0x5f, 0x42, 0x59, 0x5f, 0x55, 0x4e, 0x52, + 0x45, 0x4c, 0x41, 0x54, 0x45, 0x44, 0x10, 0x89, 0x04, 0x12, 0x1b, 0x0a, 0x16, 0x53, 0x51, 0x4c, + 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, + 0x5f, 0x42, 0x59, 0x10, 0x8a, 0x04, 0x12, 0x24, 0x0a, 0x1f, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, + 0x50, 0x50, 0x4f, 0x52, 0x54, 0x53, 0x5f, 0x4c, 0x49, 0x4b, 0x45, 0x5f, 0x45, 0x53, 0x43, 0x41, + 0x50, 0x45, 0x5f, 0x43, 0x4c, 0x41, 0x55, 0x53, 0x45, 0x10, 0x8b, 0x04, 0x12, 0x26, 0x0a, 0x21, + 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x53, 0x5f, 0x4e, 0x4f, 0x4e, + 0x5f, 0x4e, 0x55, 0x4c, 0x4c, 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x43, 0x4f, 0x4c, 0x55, 0x4d, 0x4e, + 0x53, 0x10, 0x8c, 0x04, 0x12, 0x1a, 0x0a, 0x15, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, + 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x47, 0x52, 0x41, 0x4d, 0x4d, 0x41, 0x52, 0x10, 0x8d, 0x04, + 0x12, 0x1f, 0x0a, 0x1a, 0x53, 0x51, 0x4c, 0x5f, 0x41, 0x4e, 0x53, 0x49, 0x39, 0x32, 0x5f, 0x53, + 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x4c, 0x45, 0x56, 0x45, 0x4c, 0x10, 0x8e, + 0x04, 0x12, 0x30, 0x0a, 0x2b, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, + 0x53, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x47, 0x52, 0x49, 0x54, 0x59, 0x5f, 0x45, 0x4e, 0x48, 0x41, + 0x4e, 0x43, 0x45, 0x4d, 0x45, 0x4e, 0x54, 0x5f, 0x46, 0x41, 0x43, 0x49, 0x4c, 0x49, 0x54, 0x59, + 0x10, 0x8f, 0x04, 0x12, 0x22, 0x0a, 0x1d, 0x53, 0x51, 0x4c, 0x5f, 0x4f, 0x55, 0x54, 0x45, 0x52, + 0x5f, 0x4a, 0x4f, 0x49, 0x4e, 0x53, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x5f, 0x4c, + 0x45, 0x56, 0x45, 0x4c, 0x10, 0x90, 0x04, 0x12, 0x14, 0x0a, 0x0f, 0x53, 0x51, 0x4c, 0x5f, 0x53, + 0x43, 0x48, 0x45, 0x4d, 0x41, 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x10, 0x91, 0x04, 0x12, 0x17, 0x0a, + 0x12, 0x53, 0x51, 0x4c, 0x5f, 0x50, 0x52, 0x4f, 0x43, 0x45, 0x44, 0x55, 0x52, 0x45, 0x5f, 0x54, + 0x45, 0x52, 0x4d, 0x10, 0x92, 0x04, 0x12, 0x15, 0x0a, 0x10, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x41, + 0x54, 0x41, 0x4c, 0x4f, 0x47, 0x5f, 0x54, 0x45, 0x52, 0x4d, 0x10, 0x93, 0x04, 0x12, 0x19, 0x0a, + 0x14, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x41, 0x54, 0x41, 0x4c, 0x4f, 0x47, 0x5f, 0x41, 0x54, 0x5f, + 0x53, 0x54, 0x41, 0x52, 0x54, 0x10, 0x94, 0x04, 0x12, 0x22, 0x0a, 0x1d, 0x53, 0x51, 0x4c, 0x5f, + 0x53, 0x43, 0x48, 0x45, 0x4d, 0x41, 0x53, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, + 0x44, 0x5f, 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x10, 0x95, 0x04, 0x12, 0x23, 0x0a, 0x1e, + 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x41, 0x54, 0x41, 0x4c, 0x4f, 0x47, 0x53, 0x5f, 0x53, 0x55, 0x50, + 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x10, 0x96, + 0x04, 0x12, 0x26, 0x0a, 0x21, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, + 0x45, 0x44, 0x5f, 0x50, 0x4f, 0x53, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x45, 0x44, 0x5f, 0x43, 0x4f, + 0x4d, 0x4d, 0x41, 0x4e, 0x44, 0x53, 0x10, 0x97, 0x04, 0x12, 0x24, 0x0a, 0x1f, 0x53, 0x51, 0x4c, + 0x5f, 0x53, 0x45, 0x4c, 0x45, 0x43, 0x54, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x55, 0x50, 0x44, 0x41, + 0x54, 0x45, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0x98, 0x04, 0x12, + 0x24, 0x0a, 0x1f, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x54, 0x4f, 0x52, 0x45, 0x44, 0x5f, 0x50, 0x52, + 0x4f, 0x43, 0x45, 0x44, 0x55, 0x52, 0x45, 0x53, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, + 0x45, 0x44, 0x10, 0x99, 0x04, 0x12, 0x1d, 0x0a, 0x18, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, + 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x53, 0x55, 0x42, 0x51, 0x55, 0x45, 0x52, 0x49, 0x45, + 0x53, 0x10, 0x9a, 0x04, 0x12, 0x28, 0x0a, 0x23, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x52, 0x52, + 0x45, 0x4c, 0x41, 0x54, 0x45, 0x44, 0x5f, 0x53, 0x55, 0x42, 0x51, 0x55, 0x45, 0x52, 0x49, 0x45, + 0x53, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0x9b, 0x04, 0x12, 0x19, + 0x0a, 0x14, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, + 0x55, 0x4e, 0x49, 0x4f, 0x4e, 0x53, 0x10, 0x9c, 0x04, 0x12, 0x22, 0x0a, 0x1d, 0x53, 0x51, 0x4c, + 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x42, 0x49, 0x4e, 0x41, 0x52, 0x59, 0x5f, 0x4c, 0x49, 0x54, 0x45, + 0x52, 0x41, 0x4c, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x10, 0x9d, 0x04, 0x12, 0x20, 0x0a, + 0x1b, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x43, 0x48, 0x41, 0x52, 0x5f, 0x4c, 0x49, + 0x54, 0x45, 0x52, 0x41, 0x4c, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x10, 0x9e, 0x04, 0x12, + 0x1f, 0x0a, 0x1a, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x43, 0x4f, 0x4c, 0x55, 0x4d, + 0x4e, 0x5f, 0x4e, 0x41, 0x4d, 0x45, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x10, 0x9f, 0x04, + 0x12, 0x20, 0x0a, 0x1b, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x43, 0x4f, 0x4c, 0x55, + 0x4d, 0x4e, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x42, 0x59, 0x10, + 0xa0, 0x04, 0x12, 0x1d, 0x0a, 0x18, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x43, 0x4f, + 0x4c, 0x55, 0x4d, 0x4e, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x49, 0x4e, 0x44, 0x45, 0x58, 0x10, 0xa1, + 0x04, 0x12, 0x20, 0x0a, 0x1b, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x43, 0x4f, 0x4c, + 0x55, 0x4d, 0x4e, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x4f, 0x52, 0x44, 0x45, 0x52, 0x5f, 0x42, 0x59, + 0x10, 0xa2, 0x04, 0x12, 0x1e, 0x0a, 0x19, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x43, + 0x4f, 0x4c, 0x55, 0x4d, 0x4e, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x53, 0x45, 0x4c, 0x45, 0x43, 0x54, + 0x10, 0xa3, 0x04, 0x12, 0x1d, 0x0a, 0x18, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x43, + 0x4f, 0x4c, 0x55, 0x4d, 0x4e, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x10, + 0xa4, 0x04, 0x12, 0x18, 0x0a, 0x13, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x43, 0x4f, + 0x4e, 0x4e, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x10, 0xa5, 0x04, 0x12, 0x1f, 0x0a, 0x1a, + 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x43, 0x55, 0x52, 0x53, 0x4f, 0x52, 0x5f, 0x4e, + 0x41, 0x4d, 0x45, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x10, 0xa6, 0x04, 0x12, 0x19, 0x0a, + 0x14, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x49, 0x4e, 0x44, 0x45, 0x58, 0x5f, 0x4c, + 0x45, 0x4e, 0x47, 0x54, 0x48, 0x10, 0xa7, 0x04, 0x12, 0x1e, 0x0a, 0x19, 0x53, 0x51, 0x4c, 0x5f, + 0x44, 0x42, 0x5f, 0x53, 0x43, 0x48, 0x45, 0x4d, 0x41, 0x5f, 0x4e, 0x41, 0x4d, 0x45, 0x5f, 0x4c, + 0x45, 0x4e, 0x47, 0x54, 0x48, 0x10, 0xa8, 0x04, 0x12, 0x22, 0x0a, 0x1d, 0x53, 0x51, 0x4c, 0x5f, + 0x4d, 0x41, 0x58, 0x5f, 0x50, 0x52, 0x4f, 0x43, 0x45, 0x44, 0x55, 0x52, 0x45, 0x5f, 0x4e, 0x41, + 0x4d, 0x45, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x10, 0xa9, 0x04, 0x12, 0x20, 0x0a, 0x1b, + 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x43, 0x41, 0x54, 0x41, 0x4c, 0x4f, 0x47, 0x5f, + 0x4e, 0x41, 0x4d, 0x45, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x10, 0xaa, 0x04, 0x12, 0x15, + 0x0a, 0x10, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x52, 0x4f, 0x57, 0x5f, 0x53, 0x49, + 0x5a, 0x45, 0x10, 0xab, 0x04, 0x12, 0x24, 0x0a, 0x1f, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, + 0x5f, 0x52, 0x4f, 0x57, 0x5f, 0x53, 0x49, 0x5a, 0x45, 0x5f, 0x49, 0x4e, 0x43, 0x4c, 0x55, 0x44, + 0x45, 0x53, 0x5f, 0x42, 0x4c, 0x4f, 0x42, 0x53, 0x10, 0xac, 0x04, 0x12, 0x1d, 0x0a, 0x18, 0x53, + 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x4d, 0x45, 0x4e, 0x54, + 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x10, 0xad, 0x04, 0x12, 0x17, 0x0a, 0x12, 0x53, 0x51, + 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x4d, 0x45, 0x4e, 0x54, 0x53, + 0x10, 0xae, 0x04, 0x12, 0x1e, 0x0a, 0x19, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x54, + 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x4e, 0x41, 0x4d, 0x45, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, + 0x10, 0xaf, 0x04, 0x12, 0x1d, 0x0a, 0x18, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x54, + 0x41, 0x42, 0x4c, 0x45, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x53, 0x45, 0x4c, 0x45, 0x43, 0x54, 0x10, + 0xb0, 0x04, 0x12, 0x1c, 0x0a, 0x17, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x55, 0x53, + 0x45, 0x52, 0x4e, 0x41, 0x4d, 0x45, 0x5f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x10, 0xb1, 0x04, + 0x12, 0x26, 0x0a, 0x21, 0x53, 0x51, 0x4c, 0x5f, 0x44, 0x45, 0x46, 0x41, 0x55, 0x4c, 0x54, 0x5f, + 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x49, 0x53, 0x4f, 0x4c, + 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0xb2, 0x04, 0x12, 0x1f, 0x0a, 0x1a, 0x53, 0x51, 0x4c, 0x5f, + 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x5f, 0x53, 0x55, 0x50, + 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0xb3, 0x04, 0x12, 0x30, 0x0a, 0x2b, 0x53, 0x51, 0x4c, + 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x54, 0x52, 0x41, 0x4e, 0x53, + 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x5f, 0x49, 0x53, 0x4f, 0x4c, 0x41, 0x54, 0x49, 0x4f, + 0x4e, 0x5f, 0x4c, 0x45, 0x56, 0x45, 0x4c, 0x53, 0x10, 0xb4, 0x04, 0x12, 0x32, 0x0a, 0x2d, 0x53, + 0x51, 0x4c, 0x5f, 0x44, 0x41, 0x54, 0x41, 0x5f, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x49, 0x54, 0x49, + 0x4f, 0x4e, 0x5f, 0x43, 0x41, 0x55, 0x53, 0x45, 0x53, 0x5f, 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, + 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x43, 0x4f, 0x4d, 0x4d, 0x49, 0x54, 0x10, 0xb5, 0x04, 0x12, + 0x31, 0x0a, 0x2c, 0x53, 0x51, 0x4c, 0x5f, 0x44, 0x41, 0x54, 0x41, 0x5f, 0x44, 0x45, 0x46, 0x49, + 0x4e, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x54, 0x52, 0x41, 0x4e, 0x53, + 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x5f, 0x49, 0x47, 0x4e, 0x4f, 0x52, 0x45, 0x44, 0x10, + 0xb6, 0x04, 0x12, 0x23, 0x0a, 0x1e, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, + 0x54, 0x45, 0x44, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x53, 0x45, 0x54, 0x5f, 0x54, + 0x59, 0x50, 0x45, 0x53, 0x10, 0xb7, 0x04, 0x12, 0x3b, 0x0a, 0x36, 0x53, 0x51, 0x4c, 0x5f, 0x53, + 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x43, 0x4f, 0x4e, 0x43, 0x55, 0x52, 0x52, + 0x45, 0x4e, 0x43, 0x49, 0x45, 0x53, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, + 0x54, 0x5f, 0x53, 0x45, 0x54, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, + 0x44, 0x10, 0xb8, 0x04, 0x12, 0x3c, 0x0a, 0x37, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, + 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x43, 0x4f, 0x4e, 0x43, 0x55, 0x52, 0x52, 0x45, 0x4e, 0x43, + 0x49, 0x45, 0x53, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x53, + 0x45, 0x54, 0x5f, 0x46, 0x4f, 0x52, 0x57, 0x41, 0x52, 0x44, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, 0x10, + 0xb9, 0x04, 0x12, 0x40, 0x0a, 0x3b, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, + 0x54, 0x45, 0x44, 0x5f, 0x43, 0x4f, 0x4e, 0x43, 0x55, 0x52, 0x52, 0x45, 0x4e, 0x43, 0x49, 0x45, + 0x53, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x53, 0x45, 0x54, + 0x5f, 0x53, 0x43, 0x52, 0x4f, 0x4c, 0x4c, 0x5f, 0x53, 0x45, 0x4e, 0x53, 0x49, 0x54, 0x49, 0x56, + 0x45, 0x10, 0xba, 0x04, 0x12, 0x42, 0x0a, 0x3d, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x50, 0x50, + 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x43, 0x4f, 0x4e, 0x43, 0x55, 0x52, 0x52, 0x45, 0x4e, 0x43, + 0x49, 0x45, 0x53, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x53, + 0x45, 0x54, 0x5f, 0x53, 0x43, 0x52, 0x4f, 0x4c, 0x4c, 0x5f, 0x49, 0x4e, 0x53, 0x45, 0x4e, 0x53, + 0x49, 0x54, 0x49, 0x56, 0x45, 0x10, 0xbb, 0x04, 0x12, 0x20, 0x0a, 0x1b, 0x53, 0x51, 0x4c, 0x5f, + 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, 0x53, 0x5f, 0x53, 0x55, + 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0xbc, 0x04, 0x12, 0x1d, 0x0a, 0x18, 0x53, 0x51, + 0x4c, 0x5f, 0x53, 0x41, 0x56, 0x45, 0x50, 0x4f, 0x49, 0x4e, 0x54, 0x53, 0x5f, 0x53, 0x55, 0x50, + 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0xbd, 0x04, 0x12, 0x23, 0x0a, 0x1e, 0x53, 0x51, 0x4c, + 0x5f, 0x4e, 0x41, 0x4d, 0x45, 0x44, 0x5f, 0x50, 0x41, 0x52, 0x41, 0x4d, 0x45, 0x54, 0x45, 0x52, + 0x53, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0xbe, 0x04, 0x12, 0x1d, + 0x0a, 0x18, 0x53, 0x51, 0x4c, 0x5f, 0x4c, 0x4f, 0x43, 0x41, 0x54, 0x4f, 0x52, 0x53, 0x5f, 0x55, + 0x50, 0x44, 0x41, 0x54, 0x45, 0x5f, 0x43, 0x4f, 0x50, 0x59, 0x10, 0xbf, 0x04, 0x12, 0x35, 0x0a, + 0x30, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x54, 0x4f, 0x52, 0x45, 0x44, 0x5f, 0x46, 0x55, 0x4e, 0x43, + 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x5f, 0x55, 0x53, 0x49, 0x4e, 0x47, 0x5f, 0x43, 0x41, 0x4c, 0x4c, + 0x5f, 0x53, 0x59, 0x4e, 0x54, 0x41, 0x58, 0x5f, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, + 0x44, 0x10, 0xc0, 0x04, 0x2a, 0xb2, 0x01, 0x0a, 0x1b, 0x53, 0x71, 0x6c, 0x53, 0x75, 0x70, 0x70, + 0x6f, 0x72, 0x74, 0x65, 0x64, 0x43, 0x61, 0x73, 0x65, 0x53, 0x65, 0x6e, 0x73, 0x69, 0x74, 0x69, + 0x76, 0x69, 0x74, 0x79, 0x12, 0x20, 0x0a, 0x1c, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x41, 0x53, 0x45, + 0x5f, 0x53, 0x45, 0x4e, 0x53, 0x49, 0x54, 0x49, 0x56, 0x49, 0x54, 0x59, 0x5f, 0x55, 0x4e, 0x4b, + 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x29, 0x0a, 0x25, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x41, + 0x53, 0x45, 0x5f, 0x53, 0x45, 0x4e, 0x53, 0x49, 0x54, 0x49, 0x56, 0x49, 0x54, 0x59, 0x5f, 0x43, + 0x41, 0x53, 0x45, 0x5f, 0x49, 0x4e, 0x53, 0x45, 0x4e, 0x53, 0x49, 0x54, 0x49, 0x56, 0x45, 0x10, + 0x01, 0x12, 0x22, 0x0a, 0x1e, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x41, 0x53, 0x45, 0x5f, 0x53, 0x45, + 0x4e, 0x53, 0x49, 0x54, 0x49, 0x56, 0x49, 0x54, 0x59, 0x5f, 0x55, 0x50, 0x50, 0x45, 0x52, 0x43, + 0x41, 0x53, 0x45, 0x10, 0x02, 0x12, 0x22, 0x0a, 0x1e, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x41, 0x53, + 0x45, 0x5f, 0x53, 0x45, 0x4e, 0x53, 0x49, 0x54, 0x49, 0x56, 0x49, 0x54, 0x59, 0x5f, 0x4c, 0x4f, + 0x57, 0x45, 0x52, 0x43, 0x41, 0x53, 0x45, 0x10, 0x03, 0x2a, 0x82, 0x01, 0x0a, 0x0f, 0x53, 0x71, + 0x6c, 0x4e, 0x75, 0x6c, 0x6c, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x19, 0x0a, + 0x15, 0x53, 0x51, 0x4c, 0x5f, 0x4e, 0x55, 0x4c, 0x4c, 0x53, 0x5f, 0x53, 0x4f, 0x52, 0x54, 0x45, + 0x44, 0x5f, 0x48, 0x49, 0x47, 0x48, 0x10, 0x00, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x51, 0x4c, 0x5f, + 0x4e, 0x55, 0x4c, 0x4c, 0x53, 0x5f, 0x53, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x4c, 0x4f, 0x57, + 0x10, 0x01, 0x12, 0x1d, 0x0a, 0x19, 0x53, 0x51, 0x4c, 0x5f, 0x4e, 0x55, 0x4c, 0x4c, 0x53, 0x5f, + 0x53, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x41, 0x54, 0x5f, 0x53, 0x54, 0x41, 0x52, 0x54, 0x10, + 0x02, 0x12, 0x1b, 0x0a, 0x17, 0x53, 0x51, 0x4c, 0x5f, 0x4e, 0x55, 0x4c, 0x4c, 0x53, 0x5f, 0x53, + 0x4f, 0x52, 0x54, 0x45, 0x44, 0x5f, 0x41, 0x54, 0x5f, 0x45, 0x4e, 0x44, 0x10, 0x03, 0x2a, 0x5e, + 0x0a, 0x13, 0x53, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x53, 0x71, 0x6c, 0x47, 0x72, + 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x12, 0x17, 0x0a, 0x13, 0x53, 0x51, 0x4c, 0x5f, 0x4d, 0x49, 0x4e, + 0x49, 0x4d, 0x55, 0x4d, 0x5f, 0x47, 0x52, 0x41, 0x4d, 0x4d, 0x41, 0x52, 0x10, 0x00, 0x12, 0x14, + 0x0a, 0x10, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x52, 0x45, 0x5f, 0x47, 0x52, 0x41, 0x4d, 0x4d, + 0x41, 0x52, 0x10, 0x01, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x51, 0x4c, 0x5f, 0x45, 0x58, 0x54, 0x45, + 0x4e, 0x44, 0x45, 0x44, 0x5f, 0x47, 0x52, 0x41, 0x4d, 0x4d, 0x41, 0x52, 0x10, 0x02, 0x2a, 0x68, + 0x0a, 0x1e, 0x53, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x41, 0x6e, 0x73, 0x69, 0x39, + 0x32, 0x53, 0x71, 0x6c, 0x47, 0x72, 0x61, 0x6d, 0x6d, 0x61, 0x72, 0x4c, 0x65, 0x76, 0x65, 0x6c, + 0x12, 0x14, 0x0a, 0x10, 0x41, 0x4e, 0x53, 0x49, 0x39, 0x32, 0x5f, 0x45, 0x4e, 0x54, 0x52, 0x59, + 0x5f, 0x53, 0x51, 0x4c, 0x10, 0x00, 0x12, 0x1b, 0x0a, 0x17, 0x41, 0x4e, 0x53, 0x49, 0x39, 0x32, + 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x4d, 0x45, 0x44, 0x49, 0x41, 0x54, 0x45, 0x5f, 0x53, 0x51, + 0x4c, 0x10, 0x01, 0x12, 0x13, 0x0a, 0x0f, 0x41, 0x4e, 0x53, 0x49, 0x39, 0x32, 0x5f, 0x46, 0x55, + 0x4c, 0x4c, 0x5f, 0x53, 0x51, 0x4c, 0x10, 0x02, 0x2a, 0x6d, 0x0a, 0x19, 0x53, 0x71, 0x6c, 0x4f, + 0x75, 0x74, 0x65, 0x72, 0x4a, 0x6f, 0x69, 0x6e, 0x73, 0x53, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, + 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x19, 0x0a, 0x15, 0x53, 0x51, 0x4c, 0x5f, 0x4a, 0x4f, 0x49, + 0x4e, 0x53, 0x5f, 0x55, 0x4e, 0x53, 0x55, 0x50, 0x50, 0x4f, 0x52, 0x54, 0x45, 0x44, 0x10, 0x00, + 0x12, 0x1b, 0x0a, 0x17, 0x53, 0x51, 0x4c, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x45, 0x44, 0x5f, + 0x4f, 0x55, 0x54, 0x45, 0x52, 0x5f, 0x4a, 0x4f, 0x49, 0x4e, 0x53, 0x10, 0x01, 0x12, 0x18, 0x0a, + 0x14, 0x53, 0x51, 0x4c, 0x5f, 0x46, 0x55, 0x4c, 0x4c, 0x5f, 0x4f, 0x55, 0x54, 0x45, 0x52, 0x5f, + 0x4a, 0x4f, 0x49, 0x4e, 0x53, 0x10, 0x02, 0x2a, 0x51, 0x0a, 0x13, 0x53, 0x71, 0x6c, 0x53, 0x75, + 0x70, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x42, 0x79, 0x12, 0x1a, + 0x0a, 0x16, 0x53, 0x51, 0x4c, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x42, 0x59, 0x5f, 0x55, + 0x4e, 0x52, 0x45, 0x4c, 0x41, 0x54, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1e, 0x0a, 0x1a, 0x53, 0x51, + 0x4c, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x42, 0x59, 0x5f, 0x42, 0x45, 0x59, 0x4f, 0x4e, + 0x44, 0x5f, 0x53, 0x45, 0x4c, 0x45, 0x43, 0x54, 0x10, 0x01, 0x2a, 0x90, 0x01, 0x0a, 0x1a, 0x53, + 0x71, 0x6c, 0x53, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x45, 0x6c, 0x65, 0x6d, 0x65, + 0x6e, 0x74, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x22, 0x0a, 0x1e, 0x53, 0x51, 0x4c, + 0x5f, 0x45, 0x4c, 0x45, 0x4d, 0x45, 0x4e, 0x54, 0x5f, 0x49, 0x4e, 0x5f, 0x50, 0x52, 0x4f, 0x43, + 0x45, 0x44, 0x55, 0x52, 0x45, 0x5f, 0x43, 0x41, 0x4c, 0x4c, 0x53, 0x10, 0x00, 0x12, 0x24, 0x0a, + 0x20, 0x53, 0x51, 0x4c, 0x5f, 0x45, 0x4c, 0x45, 0x4d, 0x45, 0x4e, 0x54, 0x5f, 0x49, 0x4e, 0x5f, + 0x49, 0x4e, 0x44, 0x45, 0x58, 0x5f, 0x44, 0x45, 0x46, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x4f, 0x4e, + 0x53, 0x10, 0x01, 0x12, 0x28, 0x0a, 0x24, 0x53, 0x51, 0x4c, 0x5f, 0x45, 0x4c, 0x45, 0x4d, 0x45, + 0x4e, 0x54, 0x5f, 0x49, 0x4e, 0x5f, 0x50, 0x52, 0x49, 0x56, 0x49, 0x4c, 0x45, 0x47, 0x45, 0x5f, + 0x44, 0x45, 0x46, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x10, 0x02, 0x2a, 0x56, 0x0a, + 0x1e, 0x53, 0x71, 0x6c, 0x53, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x50, 0x6f, 0x73, + 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x65, 0x64, 0x43, 0x6f, 0x6d, 0x6d, 0x61, 0x6e, 0x64, 0x73, 0x12, + 0x19, 0x0a, 0x15, 0x53, 0x51, 0x4c, 0x5f, 0x50, 0x4f, 0x53, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x45, + 0x44, 0x5f, 0x44, 0x45, 0x4c, 0x45, 0x54, 0x45, 0x10, 0x00, 0x12, 0x19, 0x0a, 0x15, 0x53, 0x51, + 0x4c, 0x5f, 0x50, 0x4f, 0x53, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x45, 0x44, 0x5f, 0x55, 0x50, 0x44, + 0x41, 0x54, 0x45, 0x10, 0x01, 0x2a, 0x97, 0x01, 0x0a, 0x16, 0x53, 0x71, 0x6c, 0x53, 0x75, 0x70, + 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x53, 0x75, 0x62, 0x71, 0x75, 0x65, 0x72, 0x69, 0x65, 0x73, + 0x12, 0x21, 0x0a, 0x1d, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x42, 0x51, 0x55, 0x45, 0x52, 0x49, + 0x45, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x41, 0x52, 0x49, 0x53, 0x4f, 0x4e, + 0x53, 0x10, 0x00, 0x12, 0x1c, 0x0a, 0x18, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x42, 0x51, 0x55, + 0x45, 0x52, 0x49, 0x45, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x45, 0x58, 0x49, 0x53, 0x54, 0x53, 0x10, + 0x01, 0x12, 0x19, 0x0a, 0x15, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x42, 0x51, 0x55, 0x45, 0x52, + 0x49, 0x45, 0x53, 0x5f, 0x49, 0x4e, 0x5f, 0x49, 0x4e, 0x53, 0x10, 0x02, 0x12, 0x21, 0x0a, 0x1d, + 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x55, 0x42, 0x51, 0x55, 0x45, 0x52, 0x49, 0x45, 0x53, 0x5f, 0x49, + 0x4e, 0x5f, 0x51, 0x55, 0x41, 0x4e, 0x54, 0x49, 0x46, 0x49, 0x45, 0x44, 0x53, 0x10, 0x03, 0x2a, + 0x36, 0x0a, 0x12, 0x53, 0x71, 0x6c, 0x53, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x55, + 0x6e, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x0d, 0x0a, 0x09, 0x53, 0x51, 0x4c, 0x5f, 0x55, 0x4e, 0x49, + 0x4f, 0x4e, 0x10, 0x00, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x51, 0x4c, 0x5f, 0x55, 0x4e, 0x49, 0x4f, + 0x4e, 0x5f, 0x41, 0x4c, 0x4c, 0x10, 0x01, 0x2a, 0xc9, 0x01, 0x0a, 0x1c, 0x53, 0x71, 0x6c, 0x54, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x73, 0x6f, 0x6c, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x51, 0x4c, 0x5f, + 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x4e, 0x4f, 0x4e, 0x45, + 0x10, 0x00, 0x12, 0x24, 0x0a, 0x20, 0x53, 0x51, 0x4c, 0x5f, 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, + 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x52, 0x45, 0x41, 0x44, 0x5f, 0x55, 0x4e, 0x43, 0x4f, 0x4d, + 0x4d, 0x49, 0x54, 0x54, 0x45, 0x44, 0x10, 0x01, 0x12, 0x22, 0x0a, 0x1e, 0x53, 0x51, 0x4c, 0x5f, + 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x52, 0x45, 0x41, 0x44, + 0x5f, 0x43, 0x4f, 0x4d, 0x4d, 0x49, 0x54, 0x54, 0x45, 0x44, 0x10, 0x02, 0x12, 0x23, 0x0a, 0x1f, + 0x53, 0x51, 0x4c, 0x5f, 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, + 0x52, 0x45, 0x50, 0x45, 0x41, 0x54, 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x52, 0x45, 0x41, 0x44, 0x10, + 0x03, 0x12, 0x20, 0x0a, 0x1c, 0x53, 0x51, 0x4c, 0x5f, 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, 0x43, + 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x53, 0x45, 0x52, 0x49, 0x41, 0x4c, 0x49, 0x5a, 0x41, 0x42, 0x4c, + 0x45, 0x10, 0x04, 0x2a, 0x89, 0x01, 0x0a, 0x18, 0x53, 0x71, 0x6c, 0x53, 0x75, 0x70, 0x70, 0x6f, + 0x72, 0x74, 0x65, 0x64, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x12, 0x1f, 0x0a, 0x1b, 0x53, 0x51, 0x4c, 0x5f, 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, 0x43, 0x54, + 0x49, 0x4f, 0x4e, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, + 0x00, 0x12, 0x24, 0x0a, 0x20, 0x53, 0x51, 0x4c, 0x5f, 0x44, 0x41, 0x54, 0x41, 0x5f, 0x44, 0x45, + 0x46, 0x49, 0x4e, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, 0x43, + 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x10, 0x01, 0x12, 0x26, 0x0a, 0x22, 0x53, 0x51, 0x4c, 0x5f, 0x44, + 0x41, 0x54, 0x41, 0x5f, 0x4d, 0x41, 0x4e, 0x49, 0x50, 0x55, 0x4c, 0x41, 0x54, 0x49, 0x4f, 0x4e, + 0x5f, 0x54, 0x52, 0x41, 0x4e, 0x53, 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x53, 0x10, 0x02, 0x2a, + 0xbc, 0x01, 0x0a, 0x19, 0x53, 0x71, 0x6c, 0x53, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, + 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x53, 0x65, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x23, 0x0a, + 0x1f, 0x53, 0x51, 0x4c, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x53, 0x45, 0x54, 0x5f, + 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, + 0x10, 0x00, 0x12, 0x24, 0x0a, 0x20, 0x53, 0x51, 0x4c, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, + 0x5f, 0x53, 0x45, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x46, 0x4f, 0x52, 0x57, 0x41, 0x52, + 0x44, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x01, 0x12, 0x2a, 0x0a, 0x26, 0x53, 0x51, 0x4c, 0x5f, + 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x53, 0x45, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, + 0x53, 0x43, 0x52, 0x4f, 0x4c, 0x4c, 0x5f, 0x49, 0x4e, 0x53, 0x45, 0x4e, 0x53, 0x49, 0x54, 0x49, + 0x56, 0x45, 0x10, 0x02, 0x12, 0x28, 0x0a, 0x24, 0x53, 0x51, 0x4c, 0x5f, 0x52, 0x45, 0x53, 0x55, + 0x4c, 0x54, 0x5f, 0x53, 0x45, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x43, 0x52, 0x4f, + 0x4c, 0x4c, 0x5f, 0x53, 0x45, 0x4e, 0x53, 0x49, 0x54, 0x49, 0x56, 0x45, 0x10, 0x03, 0x2a, 0xa2, + 0x01, 0x0a, 0x20, 0x53, 0x71, 0x6c, 0x53, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x52, + 0x65, 0x73, 0x75, 0x6c, 0x74, 0x53, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x63, 0x75, 0x72, 0x72, 0x65, + 0x6e, 0x63, 0x79, 0x12, 0x2a, 0x0a, 0x26, 0x53, 0x51, 0x4c, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, + 0x54, 0x5f, 0x53, 0x45, 0x54, 0x5f, 0x43, 0x4f, 0x4e, 0x43, 0x55, 0x52, 0x52, 0x45, 0x4e, 0x43, + 0x59, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, + 0x28, 0x0a, 0x24, 0x53, 0x51, 0x4c, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x53, 0x45, + 0x54, 0x5f, 0x43, 0x4f, 0x4e, 0x43, 0x55, 0x52, 0x52, 0x45, 0x4e, 0x43, 0x59, 0x5f, 0x52, 0x45, + 0x41, 0x44, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x01, 0x12, 0x28, 0x0a, 0x24, 0x53, 0x51, 0x4c, + 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x53, 0x45, 0x54, 0x5f, 0x43, 0x4f, 0x4e, 0x43, + 0x55, 0x52, 0x52, 0x45, 0x4e, 0x43, 0x59, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x41, 0x42, 0x4c, + 0x45, 0x10, 0x02, 0x2a, 0x99, 0x04, 0x0a, 0x12, 0x53, 0x71, 0x6c, 0x53, 0x75, 0x70, 0x70, 0x6f, + 0x72, 0x74, 0x73, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x12, 0x16, 0x0a, 0x12, 0x53, 0x51, + 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x42, 0x49, 0x47, 0x49, 0x4e, 0x54, + 0x10, 0x00, 0x12, 0x16, 0x0a, 0x12, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, + 0x54, 0x5f, 0x42, 0x49, 0x4e, 0x41, 0x52, 0x59, 0x10, 0x01, 0x12, 0x13, 0x0a, 0x0f, 0x53, 0x51, + 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x42, 0x49, 0x54, 0x10, 0x02, 0x12, + 0x14, 0x0a, 0x10, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x43, + 0x48, 0x41, 0x52, 0x10, 0x03, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, + 0x56, 0x45, 0x52, 0x54, 0x5f, 0x44, 0x41, 0x54, 0x45, 0x10, 0x04, 0x12, 0x17, 0x0a, 0x13, 0x53, + 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x44, 0x45, 0x43, 0x49, 0x4d, + 0x41, 0x4c, 0x10, 0x05, 0x12, 0x15, 0x0a, 0x11, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, + 0x45, 0x52, 0x54, 0x5f, 0x46, 0x4c, 0x4f, 0x41, 0x54, 0x10, 0x06, 0x12, 0x17, 0x0a, 0x13, 0x53, + 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x47, + 0x45, 0x52, 0x10, 0x07, 0x12, 0x21, 0x0a, 0x1d, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, + 0x45, 0x52, 0x54, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x44, 0x41, 0x59, + 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x10, 0x08, 0x12, 0x23, 0x0a, 0x1f, 0x53, 0x51, 0x4c, 0x5f, 0x43, + 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, + 0x59, 0x45, 0x41, 0x52, 0x5f, 0x4d, 0x4f, 0x4e, 0x54, 0x48, 0x10, 0x09, 0x12, 0x1d, 0x0a, 0x19, + 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x4c, 0x4f, 0x4e, 0x47, + 0x56, 0x41, 0x52, 0x42, 0x49, 0x4e, 0x41, 0x52, 0x59, 0x10, 0x0a, 0x12, 0x1b, 0x0a, 0x17, 0x53, + 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x4c, 0x4f, 0x4e, 0x47, 0x56, + 0x41, 0x52, 0x43, 0x48, 0x41, 0x52, 0x10, 0x0b, 0x12, 0x17, 0x0a, 0x13, 0x53, 0x51, 0x4c, 0x5f, + 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x4e, 0x55, 0x4d, 0x45, 0x52, 0x49, 0x43, 0x10, + 0x0c, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, + 0x5f, 0x52, 0x45, 0x41, 0x4c, 0x10, 0x0d, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x51, 0x4c, 0x5f, 0x43, + 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x53, 0x4d, 0x41, 0x4c, 0x4c, 0x49, 0x4e, 0x54, 0x10, + 0x0e, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, + 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x10, 0x0f, 0x12, 0x19, 0x0a, 0x15, 0x53, 0x51, 0x4c, 0x5f, 0x43, + 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x53, 0x54, 0x41, 0x4d, 0x50, + 0x10, 0x10, 0x12, 0x17, 0x0a, 0x13, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, + 0x54, 0x5f, 0x54, 0x49, 0x4e, 0x59, 0x49, 0x4e, 0x54, 0x10, 0x11, 0x12, 0x19, 0x0a, 0x15, 0x53, + 0x51, 0x4c, 0x5f, 0x43, 0x4f, 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x56, 0x41, 0x52, 0x42, 0x49, + 0x4e, 0x41, 0x52, 0x59, 0x10, 0x12, 0x12, 0x17, 0x0a, 0x13, 0x53, 0x51, 0x4c, 0x5f, 0x43, 0x4f, + 0x4e, 0x56, 0x45, 0x52, 0x54, 0x5f, 0x56, 0x41, 0x52, 0x43, 0x48, 0x41, 0x52, 0x10, 0x13, 0x2a, + 0x8f, 0x04, 0x0a, 0x0c, 0x58, 0x64, 0x62, 0x63, 0x44, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x15, 0x0a, 0x11, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, + 0x5f, 0x54, 0x59, 0x50, 0x45, 0x10, 0x00, 0x12, 0x0d, 0x0a, 0x09, 0x58, 0x44, 0x42, 0x43, 0x5f, + 0x43, 0x48, 0x41, 0x52, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x4e, + 0x55, 0x4d, 0x45, 0x52, 0x49, 0x43, 0x10, 0x02, 0x12, 0x10, 0x0a, 0x0c, 0x58, 0x44, 0x42, 0x43, + 0x5f, 0x44, 0x45, 0x43, 0x49, 0x4d, 0x41, 0x4c, 0x10, 0x03, 0x12, 0x10, 0x0a, 0x0c, 0x58, 0x44, + 0x42, 0x43, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x47, 0x45, 0x52, 0x10, 0x04, 0x12, 0x11, 0x0a, 0x0d, + 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x4d, 0x41, 0x4c, 0x4c, 0x49, 0x4e, 0x54, 0x10, 0x05, 0x12, + 0x0e, 0x0a, 0x0a, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x46, 0x4c, 0x4f, 0x41, 0x54, 0x10, 0x06, 0x12, + 0x0d, 0x0a, 0x09, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x52, 0x45, 0x41, 0x4c, 0x10, 0x07, 0x12, 0x0f, + 0x0a, 0x0b, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x44, 0x4f, 0x55, 0x42, 0x4c, 0x45, 0x10, 0x08, 0x12, + 0x11, 0x0a, 0x0d, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x44, 0x41, 0x54, 0x45, 0x54, 0x49, 0x4d, 0x45, + 0x10, 0x09, 0x12, 0x11, 0x0a, 0x0d, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, + 0x56, 0x41, 0x4c, 0x10, 0x0a, 0x12, 0x10, 0x0a, 0x0c, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x56, 0x41, + 0x52, 0x43, 0x48, 0x41, 0x52, 0x10, 0x0c, 0x12, 0x0d, 0x0a, 0x09, 0x58, 0x44, 0x42, 0x43, 0x5f, + 0x44, 0x41, 0x54, 0x45, 0x10, 0x5b, 0x12, 0x0d, 0x0a, 0x09, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x54, + 0x49, 0x4d, 0x45, 0x10, 0x5c, 0x12, 0x12, 0x0a, 0x0e, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x54, 0x49, + 0x4d, 0x45, 0x53, 0x54, 0x41, 0x4d, 0x50, 0x10, 0x5d, 0x12, 0x1d, 0x0a, 0x10, 0x58, 0x44, 0x42, + 0x43, 0x5f, 0x4c, 0x4f, 0x4e, 0x47, 0x56, 0x41, 0x52, 0x43, 0x48, 0x41, 0x52, 0x10, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x12, 0x18, 0x0a, 0x0b, 0x58, 0x44, 0x42, 0x43, + 0x5f, 0x42, 0x49, 0x4e, 0x41, 0x52, 0x59, 0x10, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x01, 0x12, 0x1b, 0x0a, 0x0e, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x56, 0x41, 0x52, 0x42, 0x49, + 0x4e, 0x41, 0x52, 0x59, 0x10, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x12, + 0x1f, 0x0a, 0x12, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x4c, 0x4f, 0x4e, 0x47, 0x56, 0x41, 0x52, 0x42, + 0x49, 0x4e, 0x41, 0x52, 0x59, 0x10, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, + 0x12, 0x18, 0x0a, 0x0b, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x42, 0x49, 0x47, 0x49, 0x4e, 0x54, 0x10, + 0xfb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x12, 0x19, 0x0a, 0x0c, 0x58, 0x44, + 0x42, 0x43, 0x5f, 0x54, 0x49, 0x4e, 0x59, 0x49, 0x4e, 0x54, 0x10, 0xfa, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x01, 0x12, 0x15, 0x0a, 0x08, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x42, 0x49, + 0x54, 0x10, 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x12, 0x17, 0x0a, 0x0a, + 0x58, 0x44, 0x42, 0x43, 0x5f, 0x57, 0x43, 0x48, 0x41, 0x52, 0x10, 0xf8, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x01, 0x12, 0x1a, 0x0a, 0x0d, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x57, 0x56, + 0x41, 0x52, 0x43, 0x48, 0x41, 0x52, 0x10, 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x01, 0x2a, 0xa3, 0x08, 0x0a, 0x13, 0x58, 0x64, 0x62, 0x63, 0x44, 0x61, 0x74, 0x65, 0x74, 0x69, + 0x6d, 0x65, 0x53, 0x75, 0x62, 0x63, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x14, 0x58, 0x44, 0x42, + 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, + 0x4e, 0x10, 0x00, 0x12, 0x15, 0x0a, 0x11, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, + 0x4f, 0x44, 0x45, 0x5f, 0x59, 0x45, 0x41, 0x52, 0x10, 0x01, 0x12, 0x15, 0x0a, 0x11, 0x58, 0x44, + 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x44, 0x41, 0x54, 0x45, 0x10, + 0x01, 0x12, 0x15, 0x0a, 0x11, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, + 0x45, 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x10, 0x02, 0x12, 0x16, 0x0a, 0x12, 0x58, 0x44, 0x42, 0x43, + 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x4d, 0x4f, 0x4e, 0x54, 0x48, 0x10, 0x02, + 0x12, 0x1a, 0x0a, 0x16, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, + 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x53, 0x54, 0x41, 0x4d, 0x50, 0x10, 0x03, 0x12, 0x14, 0x0a, 0x10, + 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x44, 0x41, 0x59, + 0x10, 0x03, 0x12, 0x23, 0x0a, 0x1f, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, + 0x44, 0x45, 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x5f, 0x57, 0x49, 0x54, 0x48, 0x5f, 0x54, 0x49, 0x4d, + 0x45, 0x5a, 0x4f, 0x4e, 0x45, 0x10, 0x04, 0x12, 0x15, 0x0a, 0x11, 0x58, 0x44, 0x42, 0x43, 0x5f, + 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x48, 0x4f, 0x55, 0x52, 0x10, 0x04, 0x12, 0x28, + 0x0a, 0x24, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x54, + 0x49, 0x4d, 0x45, 0x53, 0x54, 0x41, 0x4d, 0x50, 0x5f, 0x57, 0x49, 0x54, 0x48, 0x5f, 0x54, 0x49, + 0x4d, 0x45, 0x5a, 0x4f, 0x4e, 0x45, 0x10, 0x05, 0x12, 0x17, 0x0a, 0x13, 0x58, 0x44, 0x42, 0x43, + 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x4d, 0x49, 0x4e, 0x55, 0x54, 0x45, 0x10, + 0x05, 0x12, 0x17, 0x0a, 0x13, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, + 0x45, 0x5f, 0x53, 0x45, 0x43, 0x4f, 0x4e, 0x44, 0x10, 0x06, 0x12, 0x1e, 0x0a, 0x1a, 0x58, 0x44, + 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x59, 0x45, 0x41, 0x52, 0x5f, + 0x54, 0x4f, 0x5f, 0x4d, 0x4f, 0x4e, 0x54, 0x48, 0x10, 0x07, 0x12, 0x1c, 0x0a, 0x18, 0x58, 0x44, + 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x44, 0x41, 0x59, 0x5f, 0x54, + 0x4f, 0x5f, 0x48, 0x4f, 0x55, 0x52, 0x10, 0x08, 0x12, 0x1e, 0x0a, 0x1a, 0x58, 0x44, 0x42, 0x43, + 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x44, 0x41, 0x59, 0x5f, 0x54, 0x4f, 0x5f, + 0x4d, 0x49, 0x4e, 0x55, 0x54, 0x45, 0x10, 0x09, 0x12, 0x1e, 0x0a, 0x1a, 0x58, 0x44, 0x42, 0x43, + 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x44, 0x41, 0x59, 0x5f, 0x54, 0x4f, 0x5f, + 0x53, 0x45, 0x43, 0x4f, 0x4e, 0x44, 0x10, 0x0a, 0x12, 0x1f, 0x0a, 0x1b, 0x58, 0x44, 0x42, 0x43, + 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x48, 0x4f, 0x55, 0x52, 0x5f, 0x54, 0x4f, + 0x5f, 0x4d, 0x49, 0x4e, 0x55, 0x54, 0x45, 0x10, 0x0b, 0x12, 0x1f, 0x0a, 0x1b, 0x58, 0x44, 0x42, + 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x48, 0x4f, 0x55, 0x52, 0x5f, 0x54, + 0x4f, 0x5f, 0x53, 0x45, 0x43, 0x4f, 0x4e, 0x44, 0x10, 0x0c, 0x12, 0x21, 0x0a, 0x1d, 0x58, 0x44, + 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x4d, 0x49, 0x4e, 0x55, 0x54, + 0x45, 0x5f, 0x54, 0x4f, 0x5f, 0x53, 0x45, 0x43, 0x4f, 0x4e, 0x44, 0x10, 0x0d, 0x12, 0x1e, 0x0a, + 0x1a, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x49, 0x4e, + 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x59, 0x45, 0x41, 0x52, 0x10, 0x65, 0x12, 0x1f, 0x0a, + 0x1b, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x49, 0x4e, + 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x4d, 0x4f, 0x4e, 0x54, 0x48, 0x10, 0x66, 0x12, 0x1d, + 0x0a, 0x19, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x49, + 0x4e, 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x44, 0x41, 0x59, 0x10, 0x67, 0x12, 0x1e, 0x0a, + 0x1a, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x49, 0x4e, + 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x48, 0x4f, 0x55, 0x52, 0x10, 0x68, 0x12, 0x20, 0x0a, + 0x1c, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x49, 0x4e, + 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x4d, 0x49, 0x4e, 0x55, 0x54, 0x45, 0x10, 0x69, 0x12, + 0x20, 0x0a, 0x1c, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, + 0x49, 0x4e, 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x53, 0x45, 0x43, 0x4f, 0x4e, 0x44, 0x10, + 0x6a, 0x12, 0x27, 0x0a, 0x23, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, + 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x59, 0x45, 0x41, 0x52, 0x5f, + 0x54, 0x4f, 0x5f, 0x4d, 0x4f, 0x4e, 0x54, 0x48, 0x10, 0x6b, 0x12, 0x25, 0x0a, 0x21, 0x58, 0x44, + 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, + 0x56, 0x41, 0x4c, 0x5f, 0x44, 0x41, 0x59, 0x5f, 0x54, 0x4f, 0x5f, 0x48, 0x4f, 0x55, 0x52, 0x10, + 0x6c, 0x12, 0x27, 0x0a, 0x23, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, + 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x44, 0x41, 0x59, 0x5f, 0x54, + 0x4f, 0x5f, 0x4d, 0x49, 0x4e, 0x55, 0x54, 0x45, 0x10, 0x6d, 0x12, 0x27, 0x0a, 0x23, 0x58, 0x44, + 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, + 0x56, 0x41, 0x4c, 0x5f, 0x44, 0x41, 0x59, 0x5f, 0x54, 0x4f, 0x5f, 0x53, 0x45, 0x43, 0x4f, 0x4e, + 0x44, 0x10, 0x6e, 0x12, 0x28, 0x0a, 0x24, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, + 0x4f, 0x44, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x48, 0x4f, 0x55, + 0x52, 0x5f, 0x54, 0x4f, 0x5f, 0x4d, 0x49, 0x4e, 0x55, 0x54, 0x45, 0x10, 0x6f, 0x12, 0x28, 0x0a, + 0x24, 0x58, 0x44, 0x42, 0x43, 0x5f, 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x49, 0x4e, + 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, 0x5f, 0x48, 0x4f, 0x55, 0x52, 0x5f, 0x54, 0x4f, 0x5f, 0x53, + 0x45, 0x43, 0x4f, 0x4e, 0x44, 0x10, 0x70, 0x12, 0x2a, 0x0a, 0x26, 0x58, 0x44, 0x42, 0x43, 0x5f, + 0x53, 0x55, 0x42, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x56, 0x41, 0x4c, + 0x5f, 0x4d, 0x49, 0x4e, 0x55, 0x54, 0x45, 0x5f, 0x54, 0x4f, 0x5f, 0x53, 0x45, 0x43, 0x4f, 0x4e, + 0x44, 0x10, 0x71, 0x1a, 0x02, 0x10, 0x01, 0x2a, 0x57, 0x0a, 0x08, 0x4e, 0x75, 0x6c, 0x6c, 0x61, + 0x62, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x14, 0x4e, 0x55, 0x4c, 0x4c, 0x41, 0x42, 0x49, 0x4c, 0x49, + 0x54, 0x59, 0x5f, 0x4e, 0x4f, 0x5f, 0x4e, 0x55, 0x4c, 0x4c, 0x53, 0x10, 0x00, 0x12, 0x18, 0x0a, + 0x14, 0x4e, 0x55, 0x4c, 0x4c, 0x41, 0x42, 0x49, 0x4c, 0x49, 0x54, 0x59, 0x5f, 0x4e, 0x55, 0x4c, + 0x4c, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x17, 0x0a, 0x13, 0x4e, 0x55, 0x4c, 0x4c, 0x41, + 0x42, 0x49, 0x4c, 0x49, 0x54, 0x59, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x02, + 0x2a, 0x61, 0x0a, 0x0a, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x13, + 0x0a, 0x0f, 0x53, 0x45, 0x41, 0x52, 0x43, 0x48, 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x4e, 0x4f, 0x4e, + 0x45, 0x10, 0x00, 0x12, 0x13, 0x0a, 0x0f, 0x53, 0x45, 0x41, 0x52, 0x43, 0x48, 0x41, 0x42, 0x4c, + 0x45, 0x5f, 0x43, 0x48, 0x41, 0x52, 0x10, 0x01, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x45, 0x41, 0x52, + 0x43, 0x48, 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x42, 0x41, 0x53, 0x49, 0x43, 0x10, 0x02, 0x12, 0x13, + 0x0a, 0x0f, 0x53, 0x45, 0x41, 0x52, 0x43, 0x48, 0x41, 0x42, 0x4c, 0x45, 0x5f, 0x46, 0x55, 0x4c, + 0x4c, 0x10, 0x03, 0x2a, 0x5c, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x44, 0x65, 0x6c, + 0x65, 0x74, 0x65, 0x52, 0x75, 0x6c, 0x65, 0x73, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x41, 0x53, 0x43, + 0x41, 0x44, 0x45, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x52, 0x45, 0x53, 0x54, 0x52, 0x49, 0x43, + 0x54, 0x10, 0x01, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x45, 0x54, 0x5f, 0x4e, 0x55, 0x4c, 0x4c, 0x10, + 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x4e, 0x4f, 0x5f, 0x41, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x03, + 0x12, 0x0f, 0x0a, 0x0b, 0x53, 0x45, 0x54, 0x5f, 0x44, 0x45, 0x46, 0x41, 0x55, 0x4c, 0x54, 0x10, + 0x04, 0x3a, 0x44, 0x0a, 0x0c, 0x65, 0x78, 0x70, 0x65, 0x72, 0x69, 0x6d, 0x65, 0x6e, 0x74, 0x61, + 0x6c, 0x12, 0x1f, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x18, 0xe8, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x65, 0x78, 0x70, 0x65, 0x72, + 0x69, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x42, 0x5b, 0x0a, 0x20, 0x6f, 0x72, 0x67, 0x2e, 0x61, + 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, + 0x68, 0x74, 0x2e, 0x73, 0x71, 0x6c, 0x2e, 0x69, 0x6d, 0x70, 0x6c, 0x5a, 0x37, 0x67, 0x69, 0x74, + 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2f, 0x61, + 0x72, 0x72, 0x6f, 0x77, 0x2f, 0x67, 0x6f, 0x2f, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2f, 0x66, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x2f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x66, 0x6c, + 0x69, 0x67, 0x68, 0x74, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_FlightSql_proto_rawDescOnce sync.Once + file_FlightSql_proto_rawDescData = file_FlightSql_proto_rawDesc +) + +func file_FlightSql_proto_rawDescGZIP() []byte { + file_FlightSql_proto_rawDescOnce.Do(func() { + file_FlightSql_proto_rawDescData = protoimpl.X.CompressGZIP(file_FlightSql_proto_rawDescData) + }) + return file_FlightSql_proto_rawDescData +} + +var file_FlightSql_proto_enumTypes = make([]protoimpl.EnumInfo, 21) +var file_FlightSql_proto_msgTypes = make([]protoimpl.MessageInfo, 19) +var file_FlightSql_proto_goTypes = []interface{}{ + (SqlInfo)(0), // 0: arrow.flight.protocol.sql.SqlInfo + (SqlSupportedCaseSensitivity)(0), // 1: arrow.flight.protocol.sql.SqlSupportedCaseSensitivity + (SqlNullOrdering)(0), // 2: arrow.flight.protocol.sql.SqlNullOrdering + (SupportedSqlGrammar)(0), // 3: arrow.flight.protocol.sql.SupportedSqlGrammar + (SupportedAnsi92SqlGrammarLevel)(0), // 4: arrow.flight.protocol.sql.SupportedAnsi92SqlGrammarLevel + (SqlOuterJoinsSupportLevel)(0), // 5: arrow.flight.protocol.sql.SqlOuterJoinsSupportLevel + (SqlSupportedGroupBy)(0), // 6: arrow.flight.protocol.sql.SqlSupportedGroupBy + (SqlSupportedElementActions)(0), // 7: arrow.flight.protocol.sql.SqlSupportedElementActions + (SqlSupportedPositionedCommands)(0), // 8: arrow.flight.protocol.sql.SqlSupportedPositionedCommands + (SqlSupportedSubqueries)(0), // 9: arrow.flight.protocol.sql.SqlSupportedSubqueries + (SqlSupportedUnions)(0), // 10: arrow.flight.protocol.sql.SqlSupportedUnions + (SqlTransactionIsolationLevel)(0), // 11: arrow.flight.protocol.sql.SqlTransactionIsolationLevel + (SqlSupportedTransactions)(0), // 12: arrow.flight.protocol.sql.SqlSupportedTransactions + (SqlSupportedResultSetType)(0), // 13: arrow.flight.protocol.sql.SqlSupportedResultSetType + (SqlSupportedResultSetConcurrency)(0), // 14: arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency + (SqlSupportsConvert)(0), // 15: arrow.flight.protocol.sql.SqlSupportsConvert + (XdbcDataType)(0), // 16: arrow.flight.protocol.sql.XdbcDataType + (XdbcDatetimeSubcode)(0), // 17: arrow.flight.protocol.sql.XdbcDatetimeSubcode + (Nullable)(0), // 18: arrow.flight.protocol.sql.Nullable + (Searchable)(0), // 19: arrow.flight.protocol.sql.Searchable + (UpdateDeleteRules)(0), // 20: arrow.flight.protocol.sql.UpdateDeleteRules + (*CommandGetSqlInfo)(nil), // 21: arrow.flight.protocol.sql.CommandGetSqlInfo + (*CommandGetXdbcTypeInfo)(nil), // 22: arrow.flight.protocol.sql.CommandGetXdbcTypeInfo + (*CommandGetCatalogs)(nil), // 23: arrow.flight.protocol.sql.CommandGetCatalogs + (*CommandGetDbSchemas)(nil), // 24: arrow.flight.protocol.sql.CommandGetDbSchemas + (*CommandGetTables)(nil), // 25: arrow.flight.protocol.sql.CommandGetTables + (*CommandGetTableTypes)(nil), // 26: arrow.flight.protocol.sql.CommandGetTableTypes + (*CommandGetPrimaryKeys)(nil), // 27: arrow.flight.protocol.sql.CommandGetPrimaryKeys + (*CommandGetExportedKeys)(nil), // 28: arrow.flight.protocol.sql.CommandGetExportedKeys + (*CommandGetImportedKeys)(nil), // 29: arrow.flight.protocol.sql.CommandGetImportedKeys + (*CommandGetCrossReference)(nil), // 30: arrow.flight.protocol.sql.CommandGetCrossReference + (*ActionCreatePreparedStatementRequest)(nil), // 31: arrow.flight.protocol.sql.ActionCreatePreparedStatementRequest + (*ActionCreatePreparedStatementResult)(nil), // 32: arrow.flight.protocol.sql.ActionCreatePreparedStatementResult + (*ActionClosePreparedStatementRequest)(nil), // 33: arrow.flight.protocol.sql.ActionClosePreparedStatementRequest + (*CommandStatementQuery)(nil), // 34: arrow.flight.protocol.sql.CommandStatementQuery + (*TicketStatementQuery)(nil), // 35: arrow.flight.protocol.sql.TicketStatementQuery + (*CommandPreparedStatementQuery)(nil), // 36: arrow.flight.protocol.sql.CommandPreparedStatementQuery + (*CommandStatementUpdate)(nil), // 37: arrow.flight.protocol.sql.CommandStatementUpdate + (*CommandPreparedStatementUpdate)(nil), // 38: arrow.flight.protocol.sql.CommandPreparedStatementUpdate + (*DoPutUpdateResult)(nil), // 39: arrow.flight.protocol.sql.DoPutUpdateResult + (*descriptor.MessageOptions)(nil), // 40: google.protobuf.MessageOptions +} +var file_FlightSql_proto_depIdxs = []int32{ + 40, // 0: arrow.flight.protocol.sql.experimental:extendee -> google.protobuf.MessageOptions + 1, // [1:1] is the sub-list for method output_type + 1, // [1:1] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 0, // [0:1] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_FlightSql_proto_init() } +func file_FlightSql_proto_init() { + if File_FlightSql_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_FlightSql_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandGetSqlInfo); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandGetXdbcTypeInfo); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandGetCatalogs); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandGetDbSchemas); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandGetTables); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandGetTableTypes); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandGetPrimaryKeys); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandGetExportedKeys); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandGetImportedKeys); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandGetCrossReference); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ActionCreatePreparedStatementRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ActionCreatePreparedStatementResult); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ActionClosePreparedStatementRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandStatementQuery); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*TicketStatementQuery); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandPreparedStatementQuery); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandStatementUpdate); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CommandPreparedStatementUpdate); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_FlightSql_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DoPutUpdateResult); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_FlightSql_proto_msgTypes[1].OneofWrappers = []interface{}{} + file_FlightSql_proto_msgTypes[3].OneofWrappers = []interface{}{} + file_FlightSql_proto_msgTypes[4].OneofWrappers = []interface{}{} + file_FlightSql_proto_msgTypes[6].OneofWrappers = []interface{}{} + file_FlightSql_proto_msgTypes[7].OneofWrappers = []interface{}{} + file_FlightSql_proto_msgTypes[8].OneofWrappers = []interface{}{} + file_FlightSql_proto_msgTypes[9].OneofWrappers = []interface{}{} + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_FlightSql_proto_rawDesc, + NumEnums: 21, + NumMessages: 19, + NumExtensions: 1, + NumServices: 0, + }, + GoTypes: file_FlightSql_proto_goTypes, + DependencyIndexes: file_FlightSql_proto_depIdxs, + EnumInfos: file_FlightSql_proto_enumTypes, + MessageInfos: file_FlightSql_proto_msgTypes, + ExtensionInfos: file_FlightSql_proto_extTypes, + }.Build() + File_FlightSql_proto = out.File + file_FlightSql_proto_rawDesc = nil + file_FlightSql_proto_goTypes = nil + file_FlightSql_proto_depIdxs = nil +} diff --git a/go/arrow/flight/record_batch_reader.go b/go/arrow/flight/record_batch_reader.go index f2e02ce6fe451..035ba9c4bbeff 100644 --- a/go/arrow/flight/record_batch_reader.go +++ b/go/arrow/flight/record_batch_reader.go @@ -21,10 +21,12 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/arrio" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) // DataStreamReader is an interface for receiving flight data messages on a stream @@ -36,6 +38,7 @@ type DataStreamReader interface { type dataMessageReader struct { rdr DataStreamReader + peeked *FlightData refCount int64 msg *ipc.Message @@ -44,7 +47,18 @@ type dataMessageReader struct { } func (d *dataMessageReader) Message() (*ipc.Message, error) { - fd, err := d.rdr.Recv() + var ( + fd *FlightData + err error + ) + + if d.peeked != nil { + fd = d.peeked + d.peeked = nil + } else { + fd, err = d.rdr.Recv() + } + if err != nil { if d.msg != nil { // clear the previous message in the error case @@ -117,12 +131,35 @@ func (r *Reader) LatestFlightDescriptor() *FlightDescriptor { return r.dmr.descr } +// Chunk is a convenience function to return a chunk of the flight stream +// returning the RecordBatch along with the FlightDescriptor and any AppMetadata. +// Each of these can be retrieved separately with their respective functions, +// this is just a convenience to retrieve all three with one function call. +func (r *Reader) Chunk() StreamChunk { + return StreamChunk{ + Data: r.Record(), + Desc: r.dmr.descr, + AppMetadata: r.dmr.lastAppMetadata, + } +} + // NewRecordReader constructs an ipc reader using the flight data stream reader // as the source of the ipc messages, opts passed will be passed to the underlying // ipc.Reader such as ipc.WithSchema and ipc.WithAllocator func NewRecordReader(r DataStreamReader, opts ...ipc.Option) (*Reader, error) { - rdr := &Reader{dmr: &dataMessageReader{rdr: r}} - var err error + // peek the first message for a descriptor + data, err := r.Recv() + if err != nil { + return nil, err + } + + rdr := &Reader{dmr: &dataMessageReader{rdr: r, refCount: 1}} + rdr.dmr.descr = data.FlightDescriptor + if len(data.DataHeader) > 0 { + rdr.dmr.peeked = data + } + + rdr.dmr.Retain() if rdr.Reader, err = ipc.NewReaderFromMessageReader(rdr.dmr, opts...); err != nil { return nil, fmt.Errorf("arrow/flight: could not create flight reader: %w", err) } @@ -144,3 +181,56 @@ func DeserializeSchema(info []byte, mem memory.Allocator) (*arrow.Schema, error) defer rdr.Release() return rdr.Schema(), nil } + +// StreamChunk represents a single chunk of a FlightData stream +type StreamChunk struct { + Data arrow.Record + Desc *FlightDescriptor + AppMetadata []byte + Err error +} + +// MessageReader is an interface representing a RecordReader +// that also provides StreamChunks and/or the ability to retrieve +// FlightDescriptors and AppMetadata from the flight stream +type MessageReader interface { + array.RecordReader + arrio.Reader + Err() error + Chunk() StreamChunk + LatestFlightDescriptor() *FlightDescriptor + LatestAppMetadata() []byte +} + +type haserr interface { + Err() error +} + +// StreamChunksFromReader is a convenience function to populate a channel +// from a record reader. It is intended to be run using a separate goroutine +// by calling `go flight.StreamChunksFromReader(rdr, ch)`. +// +// If the record reader panics, an error chunk will get sent on the channel. +// +// This will close the channel and release the reader when it completes. +func StreamChunksFromReader(rdr array.RecordReader, ch chan<- StreamChunk) { + defer close(ch) + defer func() { + if err := recover(); err != nil { + ch <- StreamChunk{Err: fmt.Errorf("panic while reading: %s", err)} + } + }() + + defer rdr.Release() + for rdr.Next() { + rec := rdr.Record() + rec.Retain() + ch <- StreamChunk{Data: rec} + } + + if e, ok := rdr.(haserr); ok { + if e.Err() != nil { + ch <- StreamChunk{Err: e.Err()} + } + } +} diff --git a/go/arrow/flight/record_batch_writer.go b/go/arrow/flight/record_batch_writer.go index ab9d71e40bbd0..efa2773c4839a 100644 --- a/go/arrow/flight/record_batch_writer.go +++ b/go/arrow/flight/record_batch_writer.go @@ -19,9 +19,9 @@ package flight import ( "bytes" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) // DataStreamWriter is an interface that represents an Arrow Flight stream @@ -60,6 +60,12 @@ type Writer struct { pw *flightPayloadWriter } +// WriteMetadata writes a payload message to the stream containing only +// the specified app metadata. +func (w *Writer) WriteMetadata(appMetadata []byte) error { + return w.pw.w.Send(&FlightData{AppMetadata: appMetadata}) +} + // SetFlightDescriptor sets the flight descriptor into the next payload that will // be written by the flight writer. It will only be put into the very next payload // and afterwards the writer will no longer keep it's pointer to the descriptor. @@ -107,3 +113,7 @@ func SerializeSchema(rec *arrow.Schema, mem memory.Allocator) []byte { w.Close() return buf.Bytes() } + +type MetadataWriter interface { + WriteMetadata([]byte) error +} diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go index 919c79b77639e..f10165ddbbe25 100644 --- a/go/arrow/flight/server.go +++ b/go/arrow/flight/server.go @@ -22,7 +22,7 @@ import ( "os" "os/signal" - "github.com/apache/arrow/go/v9/arrow/flight/internal/flight" + "github.com/apache/arrow/go/v10/arrow/flight/internal/flight" "google.golang.org/grpc" ) diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go index 0c270f2faf078..5b6fd83082fc7 100644 --- a/go/arrow/internal/arrdata/arrdata.go +++ b/go/arrow/internal/arrdata/arrdata.go @@ -21,13 +21,13 @@ import ( "fmt" "sort" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/float16" - "github.com/apache/arrow/go/v9/arrow/internal/testing/types" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/float16" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) var ( @@ -49,6 +49,7 @@ func init() { Records["decimal128"] = makeDecimal128sRecords() Records["maps"] = makeMapsRecords() Records["extension"] = makeExtensionRecords() + Records["union"] = makeUnionRecords() for k := range Records { RecordNames = append(RecordNames, k) @@ -66,19 +67,19 @@ func makeNullRecords() []arrow.Record { schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "nulls", Type: arrow.Null, Nullable: true}, + {Name: "nulls", Type: arrow.Null, Nullable: true}, }, &meta, ) mask := []bool{true, false, false, true, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { arrayOf(mem, []nullT{null, null, null, null, null}, mask), }, - []arrow.Array{ + { arrayOf(mem, []nullT{null, null, null, null, null}, mask), }, - []arrow.Array{ + { arrayOf(mem, []nullT{null, null, null, null, null}, mask), }, } @@ -109,23 +110,23 @@ func makePrimitiveRecords() []arrow.Record { schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "bools", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, - arrow.Field{Name: "int8s", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "int16s", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, - arrow.Field{Name: "int32s", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - arrow.Field{Name: "int64s", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - arrow.Field{Name: "uint8s", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}, - arrow.Field{Name: "uint16s", Type: arrow.PrimitiveTypes.Uint16, Nullable: true}, - arrow.Field{Name: "uint32s", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, - arrow.Field{Name: "uint64s", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - arrow.Field{Name: "float32s", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, - arrow.Field{Name: "float64s", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, + {Name: "bools", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, + {Name: "int8s", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, + {Name: "int16s", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, + {Name: "int32s", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + {Name: "int64s", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, + {Name: "uint8s", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}, + {Name: "uint16s", Type: arrow.PrimitiveTypes.Uint16, Nullable: true}, + {Name: "uint32s", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, + {Name: "uint64s", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + {Name: "float32s", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, + {Name: "float64s", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, }, &meta, ) mask := []bool{true, false, false, true, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { arrayOf(mem, []bool{true, false, true, false, true}, mask), arrayOf(mem, []int8{-1, -2, -3, -4, -5}, mask), arrayOf(mem, []int16{-1, -2, -3, -4, -5}, mask), @@ -138,7 +139,7 @@ func makePrimitiveRecords() []arrow.Record { arrayOf(mem, []float32{+1, +2, +3, +4, +5}, mask), arrayOf(mem, []float64{+1, +2, +3, +4, +5}, mask), }, - []arrow.Array{ + { arrayOf(mem, []bool{true, false, true, false, true}, mask), arrayOf(mem, []int8{-11, -12, -13, -14, -15}, mask), arrayOf(mem, []int16{-11, -12, -13, -14, -15}, mask), @@ -151,7 +152,7 @@ func makePrimitiveRecords() []arrow.Record { arrayOf(mem, []float32{+11, +12, +13, +14, +15}, mask), arrayOf(mem, []float64{+11, +12, +13, +14, +15}, mask), }, - []arrow.Array{ + { arrayOf(mem, []bool{true, false, true, false, true}, mask), arrayOf(mem, []int8{-21, -22, -23, -24, -25}, mask), arrayOf(mem, []int16{-21, -22, -23, -24, -25}, mask), @@ -194,49 +195,49 @@ func makeStructsRecords() []arrow.Record { mask := []bool{true, false, false, true, true, true, false, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { structOf(mem, dtype, [][]arrow.Array{ - []arrow.Array{ + { arrayOf(mem, []int32{-1, -2, -3, -4, -5}, mask[:5]), arrayOf(mem, []string{"111", "222", "333", "444", "555"}, mask[:5]), }, - []arrow.Array{ + { arrayOf(mem, []int32{-11, -12, -13, -14, -15}, mask[:5]), arrayOf(mem, []string{"1111", "1222", "1333", "1444", "1555"}, mask[:5]), }, - []arrow.Array{ + { arrayOf(mem, []int32{-21, -22, -23, -24, -25}, mask[:5]), arrayOf(mem, []string{"2111", "2222", "2333", "2444", "2555"}, mask[:5]), }, - []arrow.Array{ + { arrayOf(mem, []int32{-31, -32, -33, -34, -35}, mask[:5]), arrayOf(mem, []string{"3111", "3222", "3333", "3444", "3555"}, mask[:5]), }, - []arrow.Array{ + { arrayOf(mem, []int32{-41, -42, -43, -44, -45}, mask[:5]), arrayOf(mem, []string{"4111", "4222", "4333", "4444", "4555"}, mask[:5]), }, }, []bool{true, false, true, true, true}), }, - []arrow.Array{ + { structOf(mem, dtype, [][]arrow.Array{ - []arrow.Array{ + { arrayOf(mem, []int32{1, 2, 3, 4, 5}, mask[:5]), arrayOf(mem, []string{"-111", "-222", "-333", "-444", "-555"}, mask[:5]), }, - []arrow.Array{ + { arrayOf(mem, []int32{11, 12, 13, 14, 15}, mask[:5]), arrayOf(mem, []string{"-1111", "-1222", "-1333", "-1444", "-1555"}, mask[:5]), }, - []arrow.Array{ + { arrayOf(mem, []int32{21, 22, 23, 24, 25}, mask[:5]), arrayOf(mem, []string{"-2111", "-2222", "-2333", "-2444", "-2555"}, mask[:5]), }, - []arrow.Array{ + { arrayOf(mem, []int32{31, 32, 33, 34, 35}, mask[:5]), arrayOf(mem, []string{"-3111", "-3222", "-3333", "-3444", "-3555"}, mask[:5]), }, - []arrow.Array{ + { arrayOf(mem, []int32{41, 42, 43, 44, 45}, mask[:5]), arrayOf(mem, []string{"-4111", "-4222", "-4333", "-4444", "-4555"}, mask[:5]), }, @@ -270,28 +271,28 @@ func makeListsRecords() []arrow.Record { mask := []bool{true, false, false, true, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { listOf(mem, []arrow.Array{ arrayOf(mem, []int32{1, 2, 3, 4, 5}, mask), arrayOf(mem, []int32{11, 12, 13, 14, 15}, mask), arrayOf(mem, []int32{21, 22, 23, 24, 25}, mask), }, nil), }, - []arrow.Array{ + { listOf(mem, []arrow.Array{ arrayOf(mem, []int32{-1, -2, -3, -4, -5}, mask), arrayOf(mem, []int32{-11, -12, -13, -14, -15}, mask), arrayOf(mem, []int32{-21, -22, -23, -24, -25}, mask), }, nil), }, - []arrow.Array{ + { listOf(mem, []arrow.Array{ arrayOf(mem, []int32{-1, -2, -3, -4, -5}, mask), arrayOf(mem, []int32{-11, -12, -13, -14, -15}, mask), arrayOf(mem, []int32{-21, -22, -23, -24, -25}, mask), }, []bool{true, false, true}), }, - []arrow.Array{ + { func() arrow.Array { bldr := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) defer bldr.Release() @@ -328,21 +329,21 @@ func makeFixedSizeListsRecords() []arrow.Record { mask := []bool{true, false, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { fixedSizeListOf(mem, N, []arrow.Array{ arrayOf(mem, []int32{1, 2, 3}, mask), arrayOf(mem, []int32{11, 12, 13}, mask), arrayOf(mem, []int32{21, 22, 23}, mask), }, nil), }, - []arrow.Array{ + { fixedSizeListOf(mem, N, []arrow.Array{ arrayOf(mem, []int32{-1, -2, -3}, mask), arrayOf(mem, []int32{-11, -12, -13}, mask), arrayOf(mem, []int32{-21, -22, -23}, mask), }, nil), }, - []arrow.Array{ + { fixedSizeListOf(mem, N, []arrow.Array{ arrayOf(mem, []int32{-1, -2, -3}, mask), arrayOf(mem, []int32{-11, -12, -13}, mask), @@ -376,15 +377,15 @@ func makeStringsRecords() []arrow.Record { mask := []bool{true, false, false, true, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { arrayOf(mem, []string{"1é", "2", "3", "4", "5"}, mask), arrayOf(mem, [][]byte{[]byte("1é"), []byte("2"), []byte("3"), []byte("4"), []byte("5")}, mask), }, - []arrow.Array{ + { arrayOf(mem, []string{"11", "22", "33", "44", "55"}, mask), arrayOf(mem, [][]byte{[]byte("11"), []byte("22"), []byte("33"), []byte("44"), []byte("55")}, mask), }, - []arrow.Array{ + { arrayOf(mem, []string{"111", "222", "333", "444", "555"}, mask), arrayOf(mem, [][]byte{[]byte("111"), []byte("222"), []byte("333"), []byte("444"), []byte("555")}, mask), }, @@ -426,17 +427,17 @@ func makeFixedWidthTypesRecords() []arrow.Record { mem := memory.NewGoAllocator() schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "float16s", Type: arrow.FixedWidthTypes.Float16, Nullable: true}, - arrow.Field{Name: "time32ms", Type: arrow.FixedWidthTypes.Time32ms, Nullable: true}, - arrow.Field{Name: "time32s", Type: arrow.FixedWidthTypes.Time32s, Nullable: true}, - arrow.Field{Name: "time64ns", Type: arrow.FixedWidthTypes.Time64ns, Nullable: true}, - arrow.Field{Name: "time64us", Type: arrow.FixedWidthTypes.Time64us, Nullable: true}, - arrow.Field{Name: "timestamp_s", Type: arrow.FixedWidthTypes.Timestamp_s, Nullable: true}, - arrow.Field{Name: "timestamp_ms", Type: arrow.FixedWidthTypes.Timestamp_ms, Nullable: true}, - arrow.Field{Name: "timestamp_us", Type: arrow.FixedWidthTypes.Timestamp_us, Nullable: true}, - arrow.Field{Name: "timestamp_ns", Type: arrow.FixedWidthTypes.Timestamp_ns, Nullable: true}, - arrow.Field{Name: "date32s", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, - arrow.Field{Name: "date64s", Type: arrow.FixedWidthTypes.Date64, Nullable: true}, + {Name: "float16s", Type: arrow.FixedWidthTypes.Float16, Nullable: true}, + {Name: "time32ms", Type: arrow.FixedWidthTypes.Time32ms, Nullable: true}, + {Name: "time32s", Type: arrow.FixedWidthTypes.Time32s, Nullable: true}, + {Name: "time64ns", Type: arrow.FixedWidthTypes.Time64ns, Nullable: true}, + {Name: "time64us", Type: arrow.FixedWidthTypes.Time64us, Nullable: true}, + {Name: "timestamp_s", Type: arrow.FixedWidthTypes.Timestamp_s, Nullable: true}, + {Name: "timestamp_ms", Type: arrow.FixedWidthTypes.Timestamp_ms, Nullable: true}, + {Name: "timestamp_us", Type: arrow.FixedWidthTypes.Timestamp_us, Nullable: true}, + {Name: "timestamp_ns", Type: arrow.FixedWidthTypes.Timestamp_ns, Nullable: true}, + {Name: "date32s", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, + {Name: "date64s", Type: arrow.FixedWidthTypes.Date64, Nullable: true}, }, nil, ) @@ -450,7 +451,7 @@ func makeFixedWidthTypesRecords() []arrow.Record { mask := []bool{true, false, false, true, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { arrayOf(mem, float16s([]float32{+1, +2, +3, +4, +5}), mask), arrayOf(mem, []time32ms{-2, -1, 0, +1, +2}, mask), arrayOf(mem, []time32s{-2, -1, 0, +1, +2}, mask), @@ -463,7 +464,7 @@ func makeFixedWidthTypesRecords() []arrow.Record { arrayOf(mem, []arrow.Date32{-2, -1, 0, +1, +2}, mask), arrayOf(mem, []arrow.Date64{-2, -1, 0, +1, +2}, mask), }, - []arrow.Array{ + { arrayOf(mem, float16s([]float32{+11, +12, +13, +14, +15}), mask), arrayOf(mem, []time32ms{-12, -11, 10, +11, +12}, mask), arrayOf(mem, []time32s{-12, -11, 10, +11, +12}, mask), @@ -476,7 +477,7 @@ func makeFixedWidthTypesRecords() []arrow.Record { arrayOf(mem, []arrow.Date32{-12, -11, 10, +11, +12}, mask), arrayOf(mem, []arrow.Date64{-12, -11, 10, +11, +12}, mask), }, - []arrow.Array{ + { arrayOf(mem, float16s([]float32{+21, +22, +23, +24, +25}), mask), arrayOf(mem, []time32ms{-22, -21, 20, +21, +22}, mask), arrayOf(mem, []time32s{-22, -21, 20, +21, +22}, mask), @@ -513,19 +514,19 @@ func makeFixedSizeBinariesRecords() []arrow.Record { mem := memory.NewGoAllocator() schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "fixed_size_binary_3", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}, Nullable: true}, + {Name: "fixed_size_binary_3", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}, Nullable: true}, }, nil, ) mask := []bool{true, false, false, true, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { arrayOf(mem, []fsb3{"001", "002", "003", "004", "005"}, mask), }, - []arrow.Array{ + { arrayOf(mem, []fsb3{"011", "012", "013", "014", "015"}, mask), }, - []arrow.Array{ + { arrayOf(mem, []fsb3{"021", "022", "023", "024", "025"}, mask), }, } @@ -551,28 +552,63 @@ func makeIntervalsRecords() []arrow.Record { schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "months", Type: arrow.FixedWidthTypes.MonthInterval, Nullable: true}, - arrow.Field{Name: "days", Type: arrow.FixedWidthTypes.DayTimeInterval, Nullable: true}, - arrow.Field{Name: "nanos", Type: arrow.FixedWidthTypes.MonthDayNanoInterval, Nullable: true}, + {Name: "months", Type: arrow.FixedWidthTypes.MonthInterval, Nullable: true}, + {Name: "days", Type: arrow.FixedWidthTypes.DayTimeInterval, Nullable: true}, + {Name: "nanos", Type: arrow.FixedWidthTypes.MonthDayNanoInterval, Nullable: true}, }, nil, ) mask := []bool{true, false, false, true, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { arrayOf(mem, []arrow.MonthInterval{1, 2, 3, 4, 5}, mask), - arrayOf(mem, []arrow.DayTimeInterval{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}}, mask), - arrayOf(mem, []arrow.MonthDayNanoInterval{{1, 1, 1000}, {2, 2, 2000}, {3, 3, 3000}, {4, 4, 4000}, {5, 5, 5000}}, mask), + arrayOf(mem, []arrow.DayTimeInterval{ + {Days: 1, Milliseconds: 1}, + {Days: 2, Milliseconds: 2}, + {Days: 3, Milliseconds: 3}, + {Days: 4, Milliseconds: 4}, + {Days: 5, Milliseconds: 5}}, + mask), + arrayOf(mem, []arrow.MonthDayNanoInterval{ + {Months: 1, Days: 1, Nanoseconds: 1000}, + {Months: 2, Days: 2, Nanoseconds: 2000}, + {Months: 3, Days: 3, Nanoseconds: 3000}, + {Months: 4, Days: 4, Nanoseconds: 4000}, + {Months: 5, Days: 5, Nanoseconds: 5000}}, + mask), }, - []arrow.Array{ + { arrayOf(mem, []arrow.MonthInterval{-11, -12, -13, -14, -15}, mask), - arrayOf(mem, []arrow.DayTimeInterval{{-11, -11}, {-12, -12}, {-13, -13}, {-14, -14}, {-15, -15}}, mask), - arrayOf(mem, []arrow.MonthDayNanoInterval{{-11, -11, -11000}, {-12, -12, -12000}, {-13, -13, -13000}, {-14, -14, -14000}, {-15, -15, -15000}}, mask), + arrayOf(mem, []arrow.DayTimeInterval{ + {Days: -11, Milliseconds: -11}, + {Days: -12, Milliseconds: -12}, + {Days: -13, Milliseconds: -13}, + {Days: -14, Milliseconds: -14}, + {Days: -15, Milliseconds: -15}}, + mask), + arrayOf(mem, []arrow.MonthDayNanoInterval{ + {Months: -11, Days: -11, Nanoseconds: -11000}, + {Months: -12, Days: -12, Nanoseconds: -12000}, + {Months: -13, Days: -13, Nanoseconds: -13000}, + {Months: -14, Days: -14, Nanoseconds: -14000}, + {Months: -15, Days: -15, Nanoseconds: -15000}}, mask), }, - []arrow.Array{ + { arrayOf(mem, []arrow.MonthInterval{21, 22, 23, 24, 25, 0}, append(mask, true)), - arrayOf(mem, []arrow.DayTimeInterval{{21, 21}, {22, 22}, {23, 23}, {24, 24}, {25, 25}, {0, 0}}, append(mask, true)), - arrayOf(mem, []arrow.MonthDayNanoInterval{{21, 21, 21000}, {22, 22, 22000}, {23, 23, 23000}, {24, 24, 24000}, {25, 25, 25000}, {0, 0, 0}}, append(mask, true)), + arrayOf(mem, []arrow.DayTimeInterval{ + {Days: 21, Milliseconds: 21}, + {Days: 22, Milliseconds: 22}, + {Days: 23, Milliseconds: 23}, + {Days: 24, Milliseconds: 24}, + {Days: 25, Milliseconds: 25}, + {Days: 0, Milliseconds: 0}}, append(mask, true)), + arrayOf(mem, []arrow.MonthDayNanoInterval{ + {Months: 21, Days: 21, Nanoseconds: 21000}, + {Months: 22, Days: 22, Nanoseconds: 22000}, + {Months: 23, Days: 23, Nanoseconds: 23000}, + {Months: 24, Days: 24, Nanoseconds: 24000}, + {Months: 25, Days: 25, Nanoseconds: 25000}, + {Months: 0, Days: 0, Nanoseconds: 0}}, append(mask, true)), }, } @@ -604,28 +640,28 @@ func makeDurationsRecords() []arrow.Record { schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "durations-s", Type: &arrow.DurationType{Unit: arrow.Second}, Nullable: true}, - arrow.Field{Name: "durations-ms", Type: &arrow.DurationType{Unit: arrow.Millisecond}, Nullable: true}, - arrow.Field{Name: "durations-us", Type: &arrow.DurationType{Unit: arrow.Microsecond}, Nullable: true}, - arrow.Field{Name: "durations-ns", Type: &arrow.DurationType{Unit: arrow.Nanosecond}, Nullable: true}, + {Name: "durations-s", Type: &arrow.DurationType{Unit: arrow.Second}, Nullable: true}, + {Name: "durations-ms", Type: &arrow.DurationType{Unit: arrow.Millisecond}, Nullable: true}, + {Name: "durations-us", Type: &arrow.DurationType{Unit: arrow.Microsecond}, Nullable: true}, + {Name: "durations-ns", Type: &arrow.DurationType{Unit: arrow.Nanosecond}, Nullable: true}, }, nil, ) mask := []bool{true, false, false, true, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { arrayOf(mem, []duration_s{1, 2, 3, 4, 5}, mask), arrayOf(mem, []duration_ms{1, 2, 3, 4, 5}, mask), arrayOf(mem, []duration_us{1, 2, 3, 4, 5}, mask), arrayOf(mem, []duration_ns{1, 2, 3, 4, 5}, mask), }, - []arrow.Array{ + { arrayOf(mem, []duration_s{11, 12, 13, 14, 15}, mask), arrayOf(mem, []duration_ms{11, 12, 13, 14, 15}, mask), arrayOf(mem, []duration_us{11, 12, 13, 14, 15}, mask), arrayOf(mem, []duration_ns{11, 12, 13, 14, 15}, mask), }, - []arrow.Array{ + { arrayOf(mem, []duration_s{21, 22, 23, 24, 25}, mask), arrayOf(mem, []duration_ms{21, 22, 23, 24, 25}, mask), arrayOf(mem, []duration_us{21, 22, 23, 24, 25}, mask), @@ -657,7 +693,7 @@ func makeDecimal128sRecords() []arrow.Record { mem := memory.NewGoAllocator() schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "dec128s", Type: decimal128Type, Nullable: true}, + {Name: "dec128s", Type: decimal128Type, Nullable: true}, }, nil, ) @@ -671,13 +707,13 @@ func makeDecimal128sRecords() []arrow.Record { mask := []bool{true, false, false, true, true} chunks := [][]arrow.Array{ - []arrow.Array{ + { arrayOf(mem, dec128s([]int64{31, 32, 33, 34, 35}), mask), }, - []arrow.Array{ + { arrayOf(mem, dec128s([]int64{41, 42, 43, 44, 45}), mask), }, - []arrow.Array{ + { arrayOf(mem, dec128s([]int64{51, 52, 53, 54, 55}), mask), }, } @@ -899,6 +935,68 @@ func makeExtensionRecords() []arrow.Record { return recs } +func makeUnionRecords() []arrow.Record { + mem := memory.NewGoAllocator() + + unionFields := []arrow.Field{ + {Name: "u0", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + {Name: "u1", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}, + } + + typeCodes := []arrow.UnionTypeCode{5, 10} + sparseType := arrow.SparseUnionOf(unionFields, typeCodes) + denseType := arrow.DenseUnionOf(unionFields, typeCodes) + + schema := arrow.NewSchema([]arrow.Field{ + {Name: "sparse", Type: sparseType, Nullable: true}, + {Name: "dense", Type: denseType, Nullable: true}, + }, nil) + + sparseChildren := make([]arrow.Array, 4) + denseChildren := make([]arrow.Array, 4) + + const length = 7 + + typeIDsBuffer := memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes([]uint8{5, 10, 5, 5, 10, 10, 5})) + sparseChildren[0] = arrayOf(mem, []int32{0, 1, 2, 3, 4, 5, 6}, + []bool{true, true, true, false, true, true, true}) + defer sparseChildren[0].Release() + sparseChildren[1] = arrayOf(mem, []uint8{10, 11, 12, 13, 14, 15, 16}, + nil) + defer sparseChildren[1].Release() + sparseChildren[2] = arrayOf(mem, []int32{0, -1, -2, -3, -4, -5, -6}, + []bool{true, true, true, true, true, true, false}) + defer sparseChildren[2].Release() + sparseChildren[3] = arrayOf(mem, []uint8{100, 101, 102, 103, 104, 105, 106}, + nil) + defer sparseChildren[3].Release() + + denseChildren[0] = arrayOf(mem, []int32{0, 2, 3, 7}, []bool{true, false, true, true}) + defer denseChildren[0].Release() + denseChildren[1] = arrayOf(mem, []uint8{11, 14, 15}, nil) + defer denseChildren[1].Release() + denseChildren[2] = arrayOf(mem, []int32{0, -2, -3, -7}, []bool{false, true, true, false}) + defer denseChildren[2].Release() + denseChildren[3] = arrayOf(mem, []uint8{101, 104, 105}, nil) + defer denseChildren[3].Release() + + offsetsBuffer := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, 0, 1, 2, 1, 2, 3})) + sparse1 := array.NewSparseUnion(sparseType, length, sparseChildren[:2], typeIDsBuffer, 0) + dense1 := array.NewDenseUnion(denseType, length, denseChildren[:2], typeIDsBuffer, offsetsBuffer, 0) + + sparse2 := array.NewSparseUnion(sparseType, length, sparseChildren[2:], typeIDsBuffer, 0) + dense2 := array.NewDenseUnion(denseType, length, denseChildren[2:], typeIDsBuffer, offsetsBuffer, 0) + + defer sparse1.Release() + defer dense1.Release() + defer sparse2.Release() + defer dense2.Release() + + return []arrow.Record{ + array.NewRecord(schema, []arrow.Array{sparse1, dense1}, -1), + array.NewRecord(schema, []arrow.Array{sparse2, dense2}, -1)} +} + func extArray(mem memory.Allocator, dt arrow.ExtensionType, a interface{}, valids []bool) arrow.Array { var storage arrow.Array switch st := dt.StorageType().(type) { @@ -1445,5 +1543,16 @@ func buildArray(bldr array.Builder, data arrow.Array) { bldr.AppendNull() } } + + case *array.LargeStringBuilder: + data := data.(*array.LargeString) + for i := 0; i < data.Len(); i++ { + switch { + case data.IsValid(i): + bldr.Append(data.Value(i)) + default: + bldr.AppendNull() + } + } } } diff --git a/go/arrow/internal/arrdata/ioutil.go b/go/arrow/internal/arrdata/ioutil.go index 12f363df9b87a..04a8634ada25f 100644 --- a/go/arrow/internal/arrdata/ioutil.go +++ b/go/arrow/internal/arrdata/ioutil.go @@ -23,11 +23,11 @@ import ( "sync" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) // CheckArrowFile checks whether a given ARROW file contains the expected list of records. diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index a3e0f1bc8d1f8..e779bfb6b38e3 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -27,14 +27,15 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/float16" - "github.com/apache/arrow/go/v9/arrow/internal/dictutils" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/float16" + "github.com/apache/arrow/go/v10/arrow/internal/dictutils" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) type Schema struct { @@ -151,8 +152,12 @@ func typeToJSON(arrowType arrow.DataType) (json.RawMessage, error) { typ = floatJSON{"floatingpoint", "DOUBLE"} case *arrow.BinaryType: typ = nameJSON{"binary"} + case *arrow.LargeBinaryType: + typ = nameJSON{"largebinary"} case *arrow.StringType: typ = nameJSON{"utf8"} + case *arrow.LargeStringType: + typ = nameJSON{"largeutf8"} case *arrow.Date32Type: typ = unitZoneJSON{Name: "date", Unit: "DAY"} case *arrow.Date64Type: @@ -201,6 +206,8 @@ func typeToJSON(arrowType arrow.DataType) (json.RawMessage, error) { } case *arrow.ListType: typ = nameJSON{"list"} + case *arrow.LargeListType: + typ = nameJSON{"largelist"} case *arrow.MapType: typ = mapJSON{Name: "map", KeysSorted: dt.KeysSorted} case *arrow.StructType: @@ -210,7 +217,11 @@ func typeToJSON(arrowType arrow.DataType) (json.RawMessage, error) { case *arrow.FixedSizeBinaryType: typ = byteWidthJSON{"fixedsizebinary", dt.ByteWidth} case *arrow.Decimal128Type: - typ = decimalJSON{"decimal", int(dt.Scale), int(dt.Precision)} + typ = decimalJSON{"decimal", int(dt.Scale), int(dt.Precision), 128} + case *arrow.Decimal256Type: + typ = decimalJSON{"decimal", int(dt.Scale), int(dt.Precision), 256} + case arrow.UnionType: + typ = unionJSON{"union", dt.Mode().String(), dt.TypeCodes()} default: return nil, fmt.Errorf("unknown arrow.DataType %v", arrowType) } @@ -319,8 +330,12 @@ func typeFromJSON(typ json.RawMessage, children []FieldWrapper) (arrowType arrow } case "binary": arrowType = arrow.BinaryTypes.Binary + case "largebinary": + arrowType = arrow.BinaryTypes.LargeBinary case "utf8": arrowType = arrow.BinaryTypes.String + case "largeutf8": + arrowType = arrow.BinaryTypes.LargeString case "date": t := unitZoneJSON{} if err = json.Unmarshal(typ, &t); err != nil { @@ -376,6 +391,13 @@ func typeFromJSON(typ json.RawMessage, children []FieldWrapper) (arrowType arrow Metadata: children[0].arrowMeta, Nullable: children[0].Nullable, }) + case "largelist": + arrowType = arrow.LargeListOfField(arrow.Field{ + Name: children[0].Name, + Type: children[0].arrowType, + Metadata: children[0].arrowMeta, + Nullable: children[0].Nullable, + }) case "map": t := mapJSON{} if err = json.Unmarshal(typ, &t); err != nil { @@ -436,10 +458,25 @@ func typeFromJSON(typ json.RawMessage, children []FieldWrapper) (arrowType arrow if err = json.Unmarshal(typ, &t); err != nil { return } - arrowType = &arrow.Decimal128Type{Precision: int32(t.Precision), Scale: int32(t.Scale)} + switch t.BitWidth { + case 256: + arrowType = &arrow.Decimal256Type{Precision: int32(t.Precision), Scale: int32(t.Scale)} + case 128, 0: // default to 128 bits when missing + arrowType = &arrow.Decimal128Type{Precision: int32(t.Precision), Scale: int32(t.Scale)} + } + case "union": + t := unionJSON{} + if err = json.Unmarshal(typ, &t); err != nil { + return + } + switch t.Mode { + case "SPARSE": + arrowType = arrow.SparseUnionOf(fieldsFromJSON(children), t.TypeIDs) + case "DENSE": + arrowType = arrow.DenseUnionOf(fieldsFromJSON(children), t.TypeIDs) + } } - if arrowType == nil { err = fmt.Errorf("unhandled type unmarshalling from json: %s", tmp.Name) } @@ -561,6 +598,7 @@ type decimalJSON struct { Name string `json:"name"` Scale int `json:"scale,omitempty"` Precision int `json:"precision,omitempty"` + BitWidth int `json:"bitWidth,omitempty"` } type byteWidthJSON struct { @@ -573,6 +611,12 @@ type mapJSON struct { KeysSorted bool `json:"keysSorted,omitempty"` } +type unionJSON struct { + Name string `json:"name"` + Mode string `json:"mode"` + TypeIDs []arrow.UnionTypeCode `json:"typeIds"` +} + func schemaToJSON(schema *arrow.Schema, mapper *dictutils.Mapper) Schema { return Schema{ Fields: fieldsToJSON(schema.Fields(), dictutils.NewFieldPos(), mapper), @@ -639,15 +683,8 @@ func fieldsToJSON(fields []arrow.Field, parentPos dictutils.FieldPos, mapper *di } } - switch dt := typ.(type) { - case *arrow.ListType: - o[i].Children = fieldsToJSON([]arrow.Field{dt.ElemField()}, pos, mapper) - case *arrow.FixedSizeListType: - o[i].Children = fieldsToJSON([]arrow.Field{dt.ElemField()}, pos, mapper) - case *arrow.StructType: + if dt, ok := typ.(arrow.NestedType); ok { o[i].Children = fieldsToJSON(dt.Fields(), pos, mapper) - case *arrow.MapType: - o[i].Children = fieldsToJSON([]arrow.Field{dt.ValueField()}, pos, mapper) } } return o @@ -724,12 +761,70 @@ func recordToJSON(rec arrow.Record) Record { } type Array struct { - Name string `json:"name"` - Count int `json:"count"` - Valids []int `json:"VALIDITY,omitempty"` - Data []interface{} `json:"DATA,omitempty"` - Offset []int32 `json:"OFFSET,omitempty"` - Children []Array `json:"children,omitempty"` + Name string `json:"name"` + Count int `json:"count"` + Valids []int `json:"VALIDITY,omitempty"` + Data []interface{} `json:"DATA,omitempty"` + TypeID []arrow.UnionTypeCode `json:"TYPE_ID,omitempty"` + Offset interface{} `json:"OFFSET,omitempty"` + Children []Array `json:"children,omitempty"` +} + +func (a *Array) MarshalJSON() ([]byte, error) { + type Alias Array + aux := struct { + *Alias + OutOffset interface{} `json:"OFFSET,omitempty"` + }{Alias: (*Alias)(a), OutOffset: a.Offset} + return json.Marshal(aux) +} + +func (a *Array) UnmarshalJSON(b []byte) (err error) { + type Alias Array + aux := &struct { + *Alias + RawOffset json.RawMessage `json:"OFFSET,omitempty"` + }{Alias: (*Alias)(a)} + + dec := json.NewDecoder(bytes.NewReader(b)) + dec.UseNumber() + + if err = dec.Decode(&aux); err != nil { + return + } + + if len(aux.RawOffset) == 0 { + return + } + + var rawOffsets []interface{} + if err = json.Unmarshal(aux.RawOffset, &rawOffsets); err != nil { + return + } + + if len(rawOffsets) == 0 { + return + } + + switch rawOffsets[0].(type) { + case string: + out := make([]int64, len(rawOffsets)) + for i, o := range rawOffsets { + out[i], err = strconv.ParseInt(o.(string), 10, 64) + if err != nil { + return + } + } + a.Offset = out + case float64: + out := make([]int32, len(rawOffsets)) + for i, o := range rawOffsets { + out[i] = int32(o.(float64)) + } + a.Offset = out + } + + return nil } func arraysFromJSON(mem memory.Allocator, schema *arrow.Schema, arrs []Array) []arrow.ArrayData { @@ -874,6 +969,22 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr bldr.AppendValues(data, valids) return returnNewArrayData(bldr) + case *arrow.LargeStringType: + bldr := array.NewLargeStringBuilder(mem) + defer bldr.Release() + data := strFromJSON(arr.Data) + valids := validsFromJSON(arr.Valids) + bldr.AppendValues(data, valids) + return returnNewArrayData(bldr) + + case *arrow.LargeBinaryType: + bldr := array.NewBinaryBuilder(mem, dt) + defer bldr.Release() + data := bytesFromJSON(arr.Data) + valids := validsFromJSON(arr.Valids) + bldr.AppendValues(data, valids) + return returnNewArrayData(bldr) + case *arrow.BinaryType: bldr := array.NewBinaryBuilder(mem, dt) defer bldr.Release() @@ -892,7 +1003,20 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr nulls := arr.Count - bitutil.CountSetBits(bitmap.Bytes(), 0, arr.Count) return array.NewData(dt, arr.Count, []*memory.Buffer{bitmap, - memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(arr.Offset))}, + memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(arr.Offset.([]int32)))}, + []arrow.ArrayData{elems}, nulls, 0) + + case *arrow.LargeListType: + valids := validsFromJSON(arr.Valids) + elems := arrayFromJSON(mem, dt.Elem(), arr.Children[0]) + defer elems.Release() + + bitmap := validsToBitmap(valids, mem) + defer bitmap.Release() + + nulls := arr.Count - bitutil.CountSetBits(bitmap.Bytes(), 0, arr.Count) + return array.NewData(dt, arr.Count, []*memory.Buffer{bitmap, + memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(arr.Offset.([]int64)))}, []arrow.ArrayData{elems}, nulls, 0) case *arrow.FixedSizeListType: @@ -951,7 +1075,7 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr nulls := arr.Count - bitutil.CountSetBits(bitmap.Bytes(), 0, arr.Count) return array.NewData(dt, arr.Count, []*memory.Buffer{bitmap, - memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(arr.Offset))}, + memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(arr.Offset.([]int32)))}, []arrow.ArrayData{elems}, nulls, 0) case *arrow.Date32Type: @@ -1034,6 +1158,14 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr bldr.AppendValues(data, valids) return returnNewArrayData(bldr) + case *arrow.Decimal256Type: + bldr := array.NewDecimal256Builder(mem, dt) + defer bldr.Release() + data := decimal256FromJSON(arr.Data) + valids := validsFromJSON(arr.Valids) + bldr.AppendValues(data, valids) + return returnNewArrayData(bldr) + case arrow.ExtensionType: storage := arrayFromJSON(mem, dt.StorageType(), arr) defer storage.Release() @@ -1044,6 +1176,31 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr defer indices.Release() return array.NewData(dt, indices.Len(), indices.Buffers(), indices.Children(), indices.NullN(), indices.Offset()) + case arrow.UnionType: + fields := make([]arrow.ArrayData, len(dt.Fields())) + for i, f := range dt.Fields() { + child := arrayFromJSON(mem, f.Type, arr.Children[i]) + defer child.Release() + fields[i] = child + } + + typeIdBuf := memory.NewBufferBytes(arrow.Int8Traits.CastToBytes(arr.TypeID)) + defer typeIdBuf.Release() + buffers := []*memory.Buffer{nil, typeIdBuf} + if dt.Mode() == arrow.DenseMode { + var offsets []byte + if arr.Offset == nil { + offsets = []byte{} + } else { + offsets = arrow.Int32Traits.CastToBytes(arr.Offset.([]int32)) + } + offsetBuf := memory.NewBufferBytes(offsets) + defer offsetBuf.Release() + buffers = append(buffers, offsetBuf) + } + + return array.NewData(dt, arr.Count, buffers, fields, 0, 0) + default: panic(fmt.Errorf("unknown data type %v %T", dt, dt)) } @@ -1159,6 +1316,21 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array { Count: arr.Len(), Data: strToJSON(arr), Valids: validsToJSON(arr), + Offset: arr.ValueOffsets(), + } + + case *array.LargeString: + offsets := arr.ValueOffsets() + strOffsets := make([]string, len(offsets)) + for i, o := range offsets { + strOffsets[i] = strconv.FormatInt(o, 10) + } + return Array{ + Name: field.Name, + Count: arr.Len(), + Data: strToJSON(arr), + Valids: validsToJSON(arr), + Offset: strOffsets, } case *array.Binary: @@ -1170,6 +1342,20 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array { Offset: arr.ValueOffsets(), } + case *array.LargeBinary: + offsets := arr.ValueOffsets() + strOffsets := make([]string, len(offsets)) + for i, o := range offsets { + strOffsets[i] = strconv.FormatInt(o, 10) + } + return Array{ + Name: field.Name, + Count: arr.Len(), + Data: bytesToJSON(arr), + Valids: validsToJSON(arr), + Offset: strOffsets, + } + case *array.List: o := Array{ Name: field.Name, @@ -1182,6 +1368,22 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array { } return o + case *array.LargeList: + offsets := arr.Offsets() + strOffsets := make([]string, len(offsets)) + for i, o := range offsets { + strOffsets[i] = strconv.FormatInt(o, 10) + } + return Array{ + Name: field.Name, + Count: arr.Len(), + Valids: validsToJSON(arr), + Offset: strOffsets, + Children: []Array{ + arrayToJSON(arrow.Field{Name: "item", Type: arr.DataType().(*arrow.LargeListType).Elem()}, arr.ListValues()), + }, + } + case *array.Map: o := Array{ Name: field.Name, @@ -1311,12 +1513,38 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array { Valids: validsToJSON(arr), } + case *array.Decimal256: + return Array{ + Name: field.Name, + Count: arr.Len(), + Data: decimal256ToJSON(arr), + Valids: validsToJSON(arr), + } + case array.ExtensionArray: return arrayToJSON(field, arr.Storage()) case *array.Dictionary: return arrayToJSON(field, arr.Indices()) + case array.Union: + dt := arr.DataType().(arrow.UnionType) + o := Array{ + Name: field.Name, + Count: arr.Len(), + Valids: validsToJSON(arr), + TypeID: arr.RawTypeCodes(), + Children: make([]Array, len(dt.Fields())), + } + if dt.Mode() == arrow.DenseMode { + o.Offset = arr.(*array.DenseUnion).RawValueOffsets() + } + fields := dt.Fields() + for i := range o.Children { + o.Children[i] = arrayToJSON(fields[i], arr.Field(i)) + } + return o + default: panic(fmt.Errorf("unknown array type %T", arr)) } @@ -1607,6 +1835,27 @@ func decimal128FromJSON(vs []interface{}) []decimal128.Num { return o } +func decimal256ToJSON(arr *array.Decimal256) []interface{} { + o := make([]interface{}, arr.Len()) + for i := range o { + o[i] = arr.Value(i).BigInt().String() + } + return o +} + +func decimal256FromJSON(vs []interface{}) []decimal256.Num { + var tmp big.Int + o := make([]decimal256.Num, len(vs)) + for i, v := range vs { + if err := tmp.UnmarshalJSON([]byte(v.(string))); err != nil { + panic(fmt.Errorf("could not convert %v (%T) to decimal128: %w", v, v, err)) + } + + o[i] = decimal256.FromBigInt(&tmp) + } + return o +} + func strFromJSON(vs []interface{}) []string { o := make([]string, len(vs)) for i, v := range vs { @@ -1622,7 +1871,12 @@ func strFromJSON(vs []interface{}) []string { return o } -func strToJSON(arr *array.String) []interface{} { +type strlike interface { + arrow.Array + Value(int) string +} + +func strToJSON(arr strlike) []interface{} { o := make([]interface{}, arr.Len()) for i := range o { o[i] = arr.Value(i) @@ -1649,7 +1903,12 @@ func bytesFromJSON(vs []interface{}) [][]byte { return o } -func bytesToJSON(arr *array.Binary) []interface{} { +type binarylike interface { + arrow.Array + Value(int) []byte +} + +func bytesToJSON(arr binarylike) []interface{} { o := make([]interface{}, arr.Len()) for i := range o { o[i] = strings.ToUpper(hex.EncodeToString(arr.Value(i))) diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go index ac016db1a1518..15bc3d4547c06 100644 --- a/go/arrow/internal/arrjson/arrjson_test.go +++ b/go/arrow/internal/arrjson/arrjson_test.go @@ -20,13 +20,12 @@ import ( "errors" "io" "io/ioutil" - "os" - "strings" "testing" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/stretchr/testify/assert" ) func TestReadWrite(t *testing.T) { @@ -45,12 +44,9 @@ func TestReadWrite(t *testing.T) { wantJSONs["maps"] = makeMapsWantJSONs() wantJSONs["extension"] = makeExtensionsWantJSONs() wantJSONs["dictionary"] = makeDictionaryWantJSONs() + wantJSONs["union"] = makeUnionWantJSONs() - tempDir, err := ioutil.TempDir("", "go-arrow-read-write-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { @@ -87,9 +83,7 @@ func TestReadWrite(t *testing.T) { } fileBytes, _ := ioutil.ReadFile(f.Name()) - if wantJSONs[name] != strings.TrimSpace(string(fileBytes)) { - t.Fatalf("not expected JSON pretty output for case: %v", name) - } + assert.JSONEq(t, wantJSONs[name], string(fileBytes)) _, err = f.Seek(0, io.SeekStart) if err != nil { @@ -1115,6 +1109,34 @@ func makeStructsWantJSONs() string { "", "4444", "4555" + ], + "OFFSET": [ + 0, + 3, + 3, + 3, + 6, + 9, + 13, + 13, + 13, + 17, + 21, + 25, + 25, + 25, + 29, + 33, + 37, + 37, + 37, + 41, + 45, + 49, + 49, + 49, + 53, + 57 ] } ] @@ -1269,6 +1291,34 @@ func makeStructsWantJSONs() string { "", "-4444", "-4555" + ], + "OFFSET": [ + 0, + 4, + 4, + 4, + 8, + 12, + 17, + 17, + 17, + 22, + 27, + 32, + 32, + 32, + 37, + 42, + 47, + 47, + 47, + 52, + 57, + 62, + 62, + 62, + 67, + 72 ] } ] @@ -1315,13 +1365,7 @@ func makeListsWantJSONs() string { 1, 1, 1 - ], - "OFFSET": [ - 0, - 5, - 10, - 15 - ], + ], "children": [ { "name": "item", @@ -1361,6 +1405,12 @@ func makeListsWantJSONs() string { 25 ] } + ], + "OFFSET": [ + 0, + 5, + 10, + 15 ] } ] @@ -1376,12 +1426,6 @@ func makeListsWantJSONs() string { 1, 1 ], - "OFFSET": [ - 0, - 5, - 10, - 15 - ], "children": [ { "name": "item", @@ -1421,6 +1465,12 @@ func makeListsWantJSONs() string { -25 ] } + ], + "OFFSET": [ + 0, + 5, + 10, + 15 ] } ] @@ -1436,12 +1486,6 @@ func makeListsWantJSONs() string { 0, 1 ], - "OFFSET": [ - 0, - 5, - 10, - 15 - ], "children": [ { "name": "item", @@ -1481,6 +1525,12 @@ func makeListsWantJSONs() string { -25 ] } + ], + "OFFSET": [ + 0, + 5, + 10, + 15 ] } ] @@ -1491,14 +1541,14 @@ func makeListsWantJSONs() string { { "name": "list_nullable", "count": 0, - "OFFSET": [ - 0 - ], "children": [ { "name": "item", "count": 0 } + ], + "OFFSET": [ + 0 ] } ] @@ -1706,6 +1756,14 @@ func makeStringsWantJSONs() string { "3", "4", "5" + ], + "OFFSET": [ + 0, + 3, + 4, + 5, + 6, + 7 ] }, { @@ -1755,6 +1813,14 @@ func makeStringsWantJSONs() string { "33", "44", "55" + ], + "OFFSET": [ + 0, + 2, + 4, + 6, + 8, + 10 ] }, { @@ -1804,6 +1870,14 @@ func makeStringsWantJSONs() string { "333", "444", "555" + ], + "OFFSET": [ + 0, + 3, + 6, + 9, + 12, + 15 ] }, { @@ -3274,7 +3348,8 @@ func makeDecimal128sWantJSONs() string { "type": { "name": "decimal", "scale": 1, - "precision": 10 + "precision": 10, + "bitWidth": 128 }, "nullable": true, "children": [] @@ -3408,12 +3483,7 @@ func makeMapsWantJSONs() string { "VALIDITY": [ 1, 0 - ], - "OFFSET": [ - 0, - 25, - 50 - ], + ], "children": [ { "name": "entries", @@ -3685,10 +3755,68 @@ func makeMapsWantJSONs() string { "", "-4444", "-4555" + ], + "OFFSET": [ + 0, + 3, + 3, + 3, + 6, + 9, + 13, + 13, + 13, + 17, + 21, + 25, + 25, + 25, + 29, + 33, + 37, + 37, + 37, + 41, + 45, + 49, + 49, + 49, + 53, + 57, + 61, + 61, + 61, + 65, + 69, + 74, + 74, + 74, + 79, + 84, + 89, + 89, + 89, + 94, + 99, + 104, + 104, + 104, + 109, + 114, + 119, + 119, + 119, + 124, + 129 ] } ] } + ], + "OFFSET": [ + 0, + 25, + 50 ] } ] @@ -3703,11 +3831,6 @@ func makeMapsWantJSONs() string { 1, 0 ], - "OFFSET": [ - 0, - 25, - 50 - ], "children": [ { "name": "entries", @@ -3979,10 +4102,68 @@ func makeMapsWantJSONs() string { "", "4444", "4555" + ], + "OFFSET": [ + 0, + 4, + 4, + 4, + 8, + 12, + 17, + 17, + 17, + 22, + 27, + 32, + 32, + 32, + 37, + 42, + 47, + 47, + 47, + 52, + 57, + 62, + 62, + 62, + 67, + 72, + 75, + 75, + 75, + 78, + 81, + 85, + 85, + 85, + 89, + 93, + 97, + 97, + 97, + 101, + 105, + 109, + 109, + 109, + 113, + 117, + 121, + 121, + 121, + 125, + 129 ] } ] } + ], + "OFFSET": [ + 0, + 25, + 50 ] } ] @@ -4900,3 +5081,352 @@ func makeExtensionsWantJSONs() string { ] }` } + +func makeUnionWantJSONs() string { + return `{ + "schema": { + "fields": [ + { + "name": "sparse", + "type": { + "name": "union", + "mode": "SPARSE", + "typeIds": [ + 5, + 10 + ] + }, + "nullable": true, + "children": [ + { + "name": "u0", + "type": { + "name": "int", + "isSigned": true, + "bitWidth": 32 + }, + "nullable": true, + "children": [] + }, + { + "name": "u1", + "type": { + "name": "int", + "bitWidth": 8 + }, + "nullable": true, + "children": [] + } + ] + }, + { + "name": "dense", + "type": { + "name": "union", + "mode": "DENSE", + "typeIds": [ + 5, + 10 + ] + }, + "nullable": true, + "children": [ + { + "name": "u0", + "type": { + "name": "int", + "isSigned": true, + "bitWidth": 32 + }, + "nullable": true, + "children": [] + }, + { + "name": "u1", + "type": { + "name": "int", + "bitWidth": 8 + }, + "nullable": true, + "children": [] + } + ] + } + ] + }, + "batches": [ + { + "count": 7, + "columns": [ + { + "name": "sparse", + "count": 7, + "VALIDITY": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "TYPE_ID": [ + 5, + 10, + 5, + 5, + 10, + 10, + 5 + ], + "children": [ + { + "name": "u0", + "count": 7, + "VALIDITY": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1 + ], + "DATA": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6 + ] + }, + { + "name": "u1", + "count": 7, + "VALIDITY": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "DATA": [ + 10, + 11, + 12, + 13, + 14, + 15, + 16 + ] + } + ] + }, + { + "name": "dense", + "count": 7, + "VALIDITY": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "TYPE_ID": [ + 5, + 10, + 5, + 5, + 10, + 10, + 5 + ], + "OFFSET": [ + 0, + 0, + 1, + 2, + 1, + 2, + 3 + ], + "children": [ + { + "name": "u0", + "count": 4, + "VALIDITY": [ + 1, + 0, + 1, + 1 + ], + "DATA": [ + 0, + 2, + 3, + 7 + ] + }, + { + "name": "u1", + "count": 3, + "VALIDITY": [ + 1, + 1, + 1 + ], + "DATA": [ + 11, + 14, + 15 + ] + } + ] + } + ] + }, + { + "count": 7, + "columns": [ + { + "name": "sparse", + "count": 7, + "VALIDITY": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "TYPE_ID": [ + 5, + 10, + 5, + 5, + 10, + 10, + 5 + ], + "children": [ + { + "name": "u0", + "count": 7, + "VALIDITY": [ + 1, + 1, + 1, + 1, + 1, + 1, + 0 + ], + "DATA": [ + 0, + -1, + -2, + -3, + -4, + -5, + -6 + ] + }, + { + "name": "u1", + "count": 7, + "VALIDITY": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "DATA": [ + 100, + 101, + 102, + 103, + 104, + 105, + 106 + ] + } + ] + }, + { + "name": "dense", + "count": 7, + "VALIDITY": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "TYPE_ID": [ + 5, + 10, + 5, + 5, + 10, + 10, + 5 + ], + "OFFSET": [ + 0, + 0, + 1, + 2, + 1, + 2, + 3 + ], + "children": [ + { + "name": "u0", + "count": 4, + "VALIDITY": [ + 0, + 1, + 1, + 0 + ], + "DATA": [ + 0, + -2, + -3, + -7 + ] + }, + { + "name": "u1", + "count": 3, + "VALIDITY": [ + 1, + 1, + 1 + ], + "DATA": [ + 101, + 104, + 105 + ] + } + ] + } + ] + } + ] +}` +} diff --git a/go/arrow/internal/arrjson/option.go b/go/arrow/internal/arrjson/option.go index 75c826d0a29a5..865a264fe4576 100644 --- a/go/arrow/internal/arrjson/option.go +++ b/go/arrow/internal/arrjson/option.go @@ -17,8 +17,8 @@ package arrjson import ( - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" ) type config struct { diff --git a/go/arrow/internal/arrjson/reader.go b/go/arrow/internal/arrjson/reader.go index c667b9a118692..82a1c7f5bc03f 100644 --- a/go/arrow/internal/arrjson/reader.go +++ b/go/arrow/internal/arrjson/reader.go @@ -21,10 +21,10 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/arrio" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/internal/dictutils" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/arrio" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/internal/dictutils" ) type Reader struct { diff --git a/go/arrow/internal/arrjson/writer.go b/go/arrow/internal/arrjson/writer.go index 86cb65fd03280..e7a356f92880a 100644 --- a/go/arrow/internal/arrjson/writer.go +++ b/go/arrow/internal/arrjson/writer.go @@ -21,10 +21,10 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/arrio" - "github.com/apache/arrow/go/v9/arrow/internal/dictutils" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/arrio" + "github.com/apache/arrow/go/v10/arrow/internal/dictutils" ) const ( diff --git a/go/arrow/internal/dictutils/dict.go b/go/arrow/internal/dictutils/dict.go index 6a4bbf3a359dc..9dd17c6540ea3 100644 --- a/go/arrow/internal/dictutils/dict.go +++ b/go/arrow/internal/dictutils/dict.go @@ -21,9 +21,9 @@ import ( "fmt" "hash/maphash" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" ) type Kind int8 diff --git a/go/arrow/internal/dictutils/dict_test.go b/go/arrow/internal/dictutils/dict_test.go index bcd54e17630d6..3413c5bcb0727 100644 --- a/go/arrow/internal/dictutils/dict_test.go +++ b/go/arrow/internal/dictutils/dict_test.go @@ -20,10 +20,10 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/dictutils" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/internal/dictutils" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestDictMemo(t *testing.T) { diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go index e811917dcdfaa..da3bd564bd2c4 100755 --- a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go +++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go @@ -22,7 +22,7 @@ import ( "fmt" "time" - "github.com/apache/arrow/go/v9/arrow/internal/flight_integration" + "github.com/apache/arrow/go/v10/arrow/internal/flight_integration" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" ) diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go index 9ff087187ec16..5ed219099be4d 100644 --- a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go +++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go @@ -23,7 +23,7 @@ import ( "os" "syscall" - "github.com/apache/arrow/go/v9/arrow/internal/flight_integration" + "github.com/apache/arrow/go/v10/arrow/internal/flight_integration" ) var ( diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go index 93297fc90bdc4..4e96d7100abb6 100644 --- a/go/arrow/internal/flight_integration/scenario.go +++ b/go/arrow/internal/flight_integration/scenario.go @@ -24,15 +24,19 @@ import ( "io" "net" "os" + "reflect" "strconv" - - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/flight" - "github.com/apache/arrow/go/v9/arrow/internal/arrjson" - "github.com/apache/arrow/go/v9/arrow/internal/testing/types" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "strings" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql" + "github.com/apache/arrow/go/v10/arrow/flight/flightsql/schema_ref" + "github.com/apache/arrow/go/v10/arrow/internal/arrjson" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" "golang.org/x/xerrors" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -51,6 +55,8 @@ func GetScenario(name string, args ...string) Scenario { return &authBasicProtoTester{} case "middleware": return &middlewareScenarioTester{} + case "flight_sql": + return &flightSqlScenarioTester{} case "": if len(args) > 0 { return &defaultIntegrationTester{path: args[0]} @@ -517,3 +523,640 @@ func (m *middlewareScenarioTester) GetFlightInfo(ctx context.Context, desc *flig TotalBytes: -1, }, nil } + +var ( + // Schema to be returned for mocking the statement/prepared statement + // results. Must be the same across all languages + QuerySchema = arrow.NewSchema([]arrow.Field{{ + Name: "id", Type: arrow.PrimitiveTypes.Int64, Nullable: true, + Metadata: flightsql.NewColumnMetadataBuilder(). + TableName("test").IsAutoIncrement(true).IsCaseSensitive(false). + TypeName("type_test").SchemaName("schema_test").IsSearchable(true). + CatalogName("catalog_test").Precision(100).Metadata(), + }}, nil) +) + +const ( + updateStatementExpectedRows int64 = 10000 + updatePreparedStatementExpectedRows int64 = 20000 +) + +type flightSqlScenarioTester struct { + flightsql.BaseServer +} + +func (m *flightSqlScenarioTester) flightInfoForCommand(desc *flight.FlightDescriptor, schema *arrow.Schema) *flight.FlightInfo { + return &flight.FlightInfo{ + Endpoint: []*flight.FlightEndpoint{ + {Ticket: &flight.Ticket{Ticket: desc.Cmd}}, + }, + Schema: flight.SerializeSchema(schema, memory.DefaultAllocator), + FlightDescriptor: desc, + TotalRecords: -1, + TotalBytes: -1, + } +} + +func (m *flightSqlScenarioTester) MakeServer(port int) flight.Server { + srv := flight.NewServerWithMiddleware(nil) + srv.RegisterFlightService(flightsql.NewFlightServer(m)) + initServer(port, srv) + return srv +} + +func assertEq(expected, actual interface{}) error { + v := reflect.Indirect(reflect.ValueOf(actual)) + if !reflect.DeepEqual(expected, v.Interface()) { + return fmt.Errorf("expected: '%s', got: '%s'", expected, actual) + } + return nil +} + +func (m *flightSqlScenarioTester) RunClient(addr string, opts ...grpc.DialOption) error { + client, err := flightsql.NewClient(addr, nil, nil, opts...) + if err != nil { + return err + } + defer client.Close() + + if err := m.ValidateMetadataRetrieval(client); err != nil { + return err + } + + if err := m.ValidateStatementExecution(client); err != nil { + return err + } + + return m.ValidatePreparedStatementExecution(client) +} + +func (m *flightSqlScenarioTester) validate(expected *arrow.Schema, result *flight.FlightInfo, client *flightsql.Client) error { + rdr, err := client.DoGet(context.Background(), result.Endpoint[0].Ticket) + if err != nil { + return err + } + + if !expected.Equal(rdr.Schema()) { + return fmt.Errorf("expected: %s, got: %s", expected, rdr.Schema()) + } + for { + _, err := rdr.Read() + if err == io.EOF { break } + if err != nil { return err } + } + return nil +} + +func (m *flightSqlScenarioTester) validateSchema(expected *arrow.Schema, result *flight.SchemaResult) error { + schema, err := flight.DeserializeSchema(result.GetSchema(), memory.DefaultAllocator) + if err != nil { + return err + } + if !expected.Equal(schema) { + return fmt.Errorf("expected: %s, got: %s", expected, schema) + } + return nil +} + +func (m *flightSqlScenarioTester) ValidateMetadataRetrieval(client *flightsql.Client) error { + var ( + catalog = "catalog" + dbSchemaFilterPattern = "db_schema_filter_pattern" + tableFilterPattern = "table_filter_pattern" + table = "table" + dbSchema = "db_schema" + tableTypes = []string{"table", "view"} + + ref = flightsql.TableRef{Catalog: &catalog, DBSchema: &dbSchema, Table: table} + pkRef = flightsql.TableRef{Catalog: proto.String("pk_catalog"), DBSchema: proto.String("pk_db_schema"), Table: "pk_table"} + fkRef = flightsql.TableRef{Catalog: proto.String("fk_catalog"), DBSchema: proto.String("fk_db_schema"), Table: "fk_table"} + + ctx = context.Background() + ) + + info, err := client.GetCatalogs(ctx) + if err != nil { + return err + } + if err := m.validate(schema_ref.Catalogs, info, client); err != nil { + return err + } + + schema, err := client.GetCatalogsSchema(ctx) + if err != nil { + return err + } + if err := m.validateSchema(schema_ref.Catalogs, schema); err != nil { + return err + } + + info, err = client.GetDBSchemas(ctx, &flightsql.GetDBSchemasOpts{Catalog: &catalog, DbSchemaFilterPattern: &dbSchemaFilterPattern}) + if err != nil { + return err + } + if err = m.validate(schema_ref.DBSchemas, info, client); err != nil { + return err + } + + schema, err = client.GetDBSchemasSchema(ctx) + if err != nil { + return err + } + if err = m.validateSchema(schema_ref.DBSchemas, schema); err != nil { + return err + } + + info, err = client.GetTables(ctx, &flightsql.GetTablesOpts{Catalog: &catalog, DbSchemaFilterPattern: &dbSchemaFilterPattern, TableNameFilterPattern: &tableFilterPattern, IncludeSchema: true, TableTypes: tableTypes}) + if err != nil { + return err + } + if err = m.validate(schema_ref.TablesWithIncludedSchema, info, client); err != nil { + return err + } + + schema, err = client.GetTablesSchema(ctx, &flightsql.GetTablesOpts{IncludeSchema: true}) + if err != nil { + return err + } + if err = m.validateSchema(schema_ref.TablesWithIncludedSchema, schema); err != nil { + return err + } + + schema, err = client.GetTablesSchema(ctx, &flightsql.GetTablesOpts{IncludeSchema: false}) + if err != nil { + return err + } + if err = m.validateSchema(schema_ref.Tables, schema); err != nil { + return err + } + + info, err = client.GetTableTypes(ctx) + if err != nil { + return err + } + if err = m.validate(schema_ref.TableTypes, info, client); err != nil { + return err + } + + schema, err = client.GetTableTypesSchema(ctx) + if err != nil { + return err + } + if err = m.validateSchema(schema_ref.TableTypes, schema); err != nil { + return err + } + + info, err = client.GetPrimaryKeys(ctx, ref) + if err != nil { + return err + } + if err = m.validate(schema_ref.PrimaryKeys, info, client); err != nil { + return err + } + + schema, err = client.GetPrimaryKeysSchema(ctx) + if err != nil { + return err + } + if err = m.validateSchema(schema_ref.PrimaryKeys, schema); err != nil { + return err + } + + info, err = client.GetExportedKeys(ctx, ref) + if err != nil { + return err + } + if err = m.validate(schema_ref.ExportedKeys, info, client); err != nil { + return err + } + + schema, err = client.GetExportedKeysSchema(ctx) + if err != nil { + return err + } + if err = m.validateSchema(schema_ref.ExportedKeys, schema); err != nil { + return err + } + + info, err = client.GetImportedKeys(ctx, ref) + if err != nil { + return err + } + if err = m.validate(schema_ref.ImportedKeys, info, client); err != nil { + return err + } + + schema, err = client.GetImportedKeysSchema(ctx) + if err != nil { + return err + } + if err = m.validateSchema(schema_ref.ImportedKeys, schema); err != nil { + return err + } + + info, err = client.GetCrossReference(ctx, pkRef, fkRef) + if err != nil { + return err + } + if err = m.validate(schema_ref.CrossReference, info, client); err != nil { + return err + } + + schema, err = client.GetCrossReferenceSchema(ctx) + if err != nil { + return err + } + if err = m.validateSchema(schema_ref.CrossReference, schema); err != nil { + return err + } + + info, err = client.GetXdbcTypeInfo(ctx, nil) + if err != nil { + return err + } + if err = m.validate(schema_ref.XdbcTypeInfo, info, client); err != nil { + return err + } + + schema, err = client.GetXdbcTypeInfoSchema(ctx) + if err != nil { + return err + } + if err = m.validateSchema(schema_ref.XdbcTypeInfo, schema); err != nil { + return err + } + + info, err = client.GetSqlInfo(ctx, []flightsql.SqlInfo{flightsql.SqlInfoFlightSqlServerName, flightsql.SqlInfoFlightSqlServerReadOnly}) + if err != nil { + return err + } + if err = m.validate(schema_ref.SqlInfo, info, client); err != nil { + return err + } + + schema, err = client.GetSqlInfoSchema(ctx) + if err != nil { + return err + } + if err = m.validateSchema(schema_ref.SqlInfo, schema); err != nil { + return err + } + + return nil +} + +func (m *flightSqlScenarioTester) ValidateStatementExecution(client *flightsql.Client) error { + ctx := context.Background() + info, err := client.Execute(ctx, "SELECT STATEMENT") + if err != nil { + return err + } + if err = m.validate(QuerySchema, info, client); err != nil { + return err + } + + schema, err := client.GetExecuteSchema(ctx, "SELECT STATEMENT") + if err != nil { + return err + } + if err = m.validateSchema(QuerySchema, schema); err != nil { + return err + } + + updateResult, err := client.ExecuteUpdate(ctx, "UPDATE STATEMENT") + if err != nil { + return err + } + if updateResult != updateStatementExpectedRows { + return fmt.Errorf("expected 'UPDATE STATEMENT' return %d got %d", updateStatementExpectedRows, updateResult) + } + return nil +} + +func (m *flightSqlScenarioTester) ValidatePreparedStatementExecution(client *flightsql.Client) error { + ctx := context.Background() + prepared, err := client.Prepare(ctx, memory.DefaultAllocator, "SELECT PREPARED STATEMENT") + if err != nil { + return err + } + + arr, _, _ := array.FromJSON(memory.DefaultAllocator, arrow.PrimitiveTypes.Int64, strings.NewReader("[1]")) + defer arr.Release() + params := array.NewRecord(QuerySchema, []arrow.Array{arr}, 1) + prepared.SetParameters(params) + + info, err := prepared.Execute(ctx) + if err != nil { + return err + } + if err = m.validate(QuerySchema, info, client); err != nil { + return err + } + schema, err := prepared.GetSchema(ctx) + if err != nil { + return err + } + if err = m.validateSchema(QuerySchema, schema); err != nil { + return err + } + + if err = prepared.Close(ctx); err != nil { + return err + } + + updatePrepared, err := client.Prepare(ctx, memory.DefaultAllocator, "UPDATE PREPARED STATEMENT") + if err != nil { + return err + } + updateResult, err := updatePrepared.ExecuteUpdate(ctx) + if err != nil { + return err + } + + if updateResult != updatePreparedStatementExpectedRows { + return fmt.Errorf("expected 'UPDATE STATEMENT' return %d got %d", updatePreparedStatementExpectedRows, updateResult) + } + return updatePrepared.Close(ctx) +} + +func (m *flightSqlScenarioTester) doGetForTestCase(schema *arrow.Schema) chan flight.StreamChunk { + ch := make(chan flight.StreamChunk) + close(ch) + return ch +} + +func (m *flightSqlScenarioTester) GetFlightInfoStatement(ctx context.Context, cmd flightsql.StatementQuery, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + if err := assertEq("SELECT STATEMENT", cmd.GetQuery()); err != nil { + return nil, err + } + + handle, err := flightsql.CreateStatementQueryTicket([]byte("SELECT STATEMENT HANDLE")) + if err != nil { + return nil, err + } + + return &flight.FlightInfo{ + Endpoint: []*flight.FlightEndpoint{ + {Ticket: &flight.Ticket{Ticket: handle}}, + }, + Schema: flight.SerializeSchema(QuerySchema, memory.DefaultAllocator), + FlightDescriptor: desc, + TotalRecords: -1, + TotalBytes: -1, + }, nil +} + +func (m *flightSqlScenarioTester) GetSchemaStatement(ctx context.Context, cmd flightsql.StatementQuery, desc *flight.FlightDescriptor) (*flight.SchemaResult, error) { + return &flight.SchemaResult{Schema: flight.SerializeSchema(QuerySchema, memory.DefaultAllocator)}, nil +} + +func (m *flightSqlScenarioTester) DoGetStatement(ctx context.Context, cmd flightsql.StatementQueryTicket) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return QuerySchema, m.doGetForTestCase(QuerySchema), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoPreparedStatement(_ context.Context, cmd flightsql.PreparedStatementQuery, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + err := assertEq([]byte("SELECT PREPARED STATEMENT HANDLE"), cmd.GetPreparedStatementHandle()) + if err != nil { + return nil, err + } + return m.flightInfoForCommand(desc, QuerySchema), nil +} + +func (m *flightSqlScenarioTester) GetSchemaPreparedStatement(ctx context.Context, cmd flightsql.PreparedStatementQuery, desc *flight.FlightDescriptor) (*flight.SchemaResult, error) { + return &flight.SchemaResult{Schema: flight.SerializeSchema(QuerySchema, memory.DefaultAllocator)}, nil +} + +func (m *flightSqlScenarioTester) DoGetPreparedStatement(_ context.Context, cmd flightsql.PreparedStatementQuery) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return QuerySchema, m.doGetForTestCase(QuerySchema), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoCatalogs(_ context.Context, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return m.flightInfoForCommand(desc, schema_ref.Catalogs), nil +} + +func (m *flightSqlScenarioTester) DoGetCatalogs(_ context.Context) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return schema_ref.Catalogs, m.doGetForTestCase(schema_ref.Catalogs), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoXdbcTypeInfo(_ context.Context, cmd flightsql.GetXdbcTypeInfo, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return m.flightInfoForCommand(desc, schema_ref.XdbcTypeInfo), nil +} + +func (m *flightSqlScenarioTester) DoGetXdbcTypeInfo(context.Context, flightsql.GetXdbcTypeInfo) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return schema_ref.XdbcTypeInfo, m.doGetForTestCase(schema_ref.XdbcTypeInfo), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoSqlInfo(_ context.Context, cmd flightsql.GetSqlInfo, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + if err := assertEq(int(2), len(cmd.GetInfo())); err != nil { + return nil, err + } + if err := assertEq(flightsql.SqlInfoFlightSqlServerName, flightsql.SqlInfo(cmd.GetInfo()[0])); err != nil { + return nil, err + } + if err := assertEq(flightsql.SqlInfoFlightSqlServerReadOnly, flightsql.SqlInfo(cmd.GetInfo()[1])); err != nil { + return nil, err + } + + return m.flightInfoForCommand(desc, schema_ref.SqlInfo), nil +} + +func (m *flightSqlScenarioTester) DoGetSqlInfo(context.Context, flightsql.GetSqlInfo) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return schema_ref.SqlInfo, m.doGetForTestCase(schema_ref.SqlInfo), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoSchemas(_ context.Context, cmd flightsql.GetDBSchemas, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + if err := assertEq("catalog", cmd.GetCatalog()); err != nil { + return nil, err + } + + if err := assertEq("db_schema_filter_pattern", cmd.GetDBSchemaFilterPattern()); err != nil { + return nil, err + } + + return m.flightInfoForCommand(desc, schema_ref.DBSchemas), nil +} + +func (m *flightSqlScenarioTester) DoGetDBSchemas(context.Context, flightsql.GetDBSchemas) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return schema_ref.DBSchemas, m.doGetForTestCase(schema_ref.DBSchemas), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoTables(_ context.Context, cmd flightsql.GetTables, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + if err := assertEq("catalog", cmd.GetCatalog()); err != nil { + return nil, err + } + + if err := assertEq("db_schema_filter_pattern", cmd.GetDBSchemaFilterPattern()); err != nil { + return nil, err + } + + if err := assertEq("table_filter_pattern", cmd.GetTableNameFilterPattern()); err != nil { + return nil, err + } + + if err := assertEq(int(2), len(cmd.GetTableTypes())); err != nil { + return nil, err + } + + if err := assertEq("table", cmd.GetTableTypes()[0]); err != nil { + return nil, err + } + + if err := assertEq("view", cmd.GetTableTypes()[1]); err != nil { + return nil, err + } + + if err := assertEq(true, cmd.GetIncludeSchema()); err != nil { + return nil, err + } + + return m.flightInfoForCommand(desc, schema_ref.TablesWithIncludedSchema), nil +} + +func (m *flightSqlScenarioTester) DoGetTables(context.Context, flightsql.GetTables) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return schema_ref.TablesWithIncludedSchema, m.doGetForTestCase(schema_ref.TablesWithIncludedSchema), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoTableTypes(_ context.Context, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + return m.flightInfoForCommand(desc, schema_ref.TableTypes), nil +} + +func (m *flightSqlScenarioTester) DoGetTableTypes(context.Context) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return schema_ref.TableTypes, m.doGetForTestCase(schema_ref.TableTypes), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoPrimaryKeys(_ context.Context, cmd flightsql.TableRef, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + if err := assertEq("catalog", cmd.Catalog); err != nil { + return nil, err + } + + if err := assertEq("db_schema", cmd.DBSchema); err != nil { + return nil, err + } + + if err := assertEq("table", cmd.Table); err != nil { + return nil, err + } + + return m.flightInfoForCommand(desc, schema_ref.PrimaryKeys), nil +} + +func (m *flightSqlScenarioTester) DoGetPrimaryKeys(context.Context, flightsql.TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return schema_ref.PrimaryKeys, m.doGetForTestCase(schema_ref.PrimaryKeys), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoExportedKeys(_ context.Context, cmd flightsql.TableRef, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + if err := assertEq("catalog", cmd.Catalog); err != nil { + return nil, err + } + + if err := assertEq("db_schema", cmd.DBSchema); err != nil { + return nil, err + } + + if err := assertEq("table", cmd.Table); err != nil { + return nil, err + } + + return m.flightInfoForCommand(desc, schema_ref.ExportedKeys), nil +} + +func (m *flightSqlScenarioTester) DoGetExportedKeys(context.Context, flightsql.TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return schema_ref.ExportedKeys, m.doGetForTestCase(schema_ref.ExportedKeys), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoImportedKeys(_ context.Context, cmd flightsql.TableRef, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + if err := assertEq("catalog", cmd.Catalog); err != nil { + return nil, err + } + + if err := assertEq("db_schema", cmd.DBSchema); err != nil { + return nil, err + } + + if err := assertEq("table", cmd.Table); err != nil { + return nil, err + } + + return m.flightInfoForCommand(desc, schema_ref.ImportedKeys), nil +} + +func (m *flightSqlScenarioTester) DoGetImportedKeys(context.Context, flightsql.TableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return schema_ref.ImportedKeys, m.doGetForTestCase(schema_ref.ImportedKeys), nil +} + +func (m *flightSqlScenarioTester) GetFlightInfoCrossReference(_ context.Context, cmd flightsql.CrossTableRef, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) { + if err := assertEq("pk_catalog", cmd.PKRef.Catalog); err != nil { + return nil, err + } + + if err := assertEq("pk_db_schema", cmd.PKRef.DBSchema); err != nil { + return nil, err + } + + if err := assertEq("pk_table", cmd.PKRef.Table); err != nil { + return nil, err + } + + if err := assertEq("fk_catalog", cmd.FKRef.Catalog); err != nil { + return nil, err + } + + if err := assertEq("fk_db_schema", cmd.FKRef.DBSchema); err != nil { + return nil, err + } + + if err := assertEq("fk_table", cmd.FKRef.Table); err != nil { + return nil, err + } + + return m.flightInfoForCommand(desc, schema_ref.TableTypes), nil +} + +func (m *flightSqlScenarioTester) DoGetCrossReference(context.Context, flightsql.CrossTableRef) (*arrow.Schema, <-chan flight.StreamChunk, error) { + return schema_ref.CrossReference, m.doGetForTestCase(schema_ref.CrossReference), nil +} + +func (m *flightSqlScenarioTester) DoPutCommandStatementUpdate(_ context.Context, cmd flightsql.StatementUpdate) (int64, error) { + if err := assertEq("UPDATE STATEMENT", cmd.GetQuery()); err != nil { + return 0, err + } + + return updateStatementExpectedRows, nil +} + +func (m *flightSqlScenarioTester) CreatePreparedStatement(_ context.Context, request flightsql.ActionCreatePreparedStatementRequest) (res flightsql.ActionCreatePreparedStatementResult, err error) { + err = assertEq(true, request.GetQuery() == "SELECT PREPARED STATEMENT" || request.GetQuery() == "UPDATE PREPARED STATEMENT") + if err != nil { + return + } + + res.Handle = []byte(request.GetQuery() + " HANDLE") + return +} + +func (m *flightSqlScenarioTester) ClosePreparedStatement(context.Context, flightsql.ActionClosePreparedStatementRequest) error { + return nil +} + +func (m *flightSqlScenarioTester) DoPutPreparedStatementQuery(_ context.Context, cmd flightsql.PreparedStatementQuery, rdr flight.MessageReader, _ flight.MetadataWriter) error { + err := assertEq([]byte("SELECT PREPARED STATEMENT HANDLE"), cmd.GetPreparedStatementHandle()) + if err != nil { + return err + } + + actualSchema := rdr.Schema() + if err = assertEq(true, actualSchema.Equal(QuerySchema)); err != nil { + return err + } + + return nil +} + +func (m *flightSqlScenarioTester) DoPutPreparedStatementUpdate(_ context.Context, cmd flightsql.PreparedStatementUpdate, _ flight.MessageReader) (int64, error) { + err := assertEq([]byte("UPDATE PREPARED STATEMENT HANDLE"), cmd.GetPreparedStatementHandle()) + if err != nil { + return 0, err + } + + return updatePreparedStatementExpectedRows, nil +} diff --git a/go/arrow/internal/testing/gen/random_array_gen.go b/go/arrow/internal/testing/gen/random_array_gen.go index 3fda662bf3a7e..b99b52534c13a 100644 --- a/go/arrow/internal/testing/gen/random_array_gen.go +++ b/go/arrow/internal/testing/gen/random_array_gen.go @@ -17,10 +17,10 @@ package gen import ( - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) @@ -305,3 +305,33 @@ func (r *RandomArrayGenerator) String(size int64, minLength, maxLength int, null return bldr.NewArray() } + +func (r *RandomArrayGenerator) LargeString(size int64, minLength, maxLength int64, nullprob float64) arrow.Array { + lengths := r.Int64(size, minLength, maxLength, nullprob).(*array.Int64) + defer lengths.Release() + + bldr := array.NewLargeStringBuilder(r.mem) + defer bldr.Release() + + r.extra++ + dist := rand.New(rand.NewSource(r.seed + r.extra)) + + buf := make([]byte, 0, maxLength) + gen := func(n int64) string { + out := buf[:n] + for i := range out { + out[i] = uint8(dist.Int63n(int64('z')-int64('A')+1) + int64('A')) + } + return string(out) + } + + for i := 0; i < lengths.Len(); i++ { + if lengths.IsValid(i) { + bldr.Append(gen(lengths.Value(i))) + } else { + bldr.AppendNull() + } + } + + return bldr.NewArray() +} diff --git a/go/arrow/internal/testing/tools/bits_test.go b/go/arrow/internal/testing/tools/bits_test.go index 81a290ea293da..fac20b043d079 100644 --- a/go/arrow/internal/testing/tools/bits_test.go +++ b/go/arrow/internal/testing/tools/bits_test.go @@ -20,7 +20,7 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v9/arrow/internal/testing/tools" + "github.com/apache/arrow/go/v10/arrow/internal/testing/tools" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/internal/testing/types/extension_types.go b/go/arrow/internal/testing/types/extension_types.go index c266de169e10b..0e75806634184 100644 --- a/go/arrow/internal/testing/types/extension_types.go +++ b/go/arrow/internal/testing/types/extension_types.go @@ -22,8 +22,8 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" "golang.org/x/xerrors" ) @@ -273,15 +273,51 @@ func (p *DictExtensionType) Deserialize(storage arrow.DataType, data string) (ar return NewDictExtensionType(), nil } +// SmallintArray is an int16 array +type SmallintArray struct { + array.ExtensionArrayBase +} + +type SmallintType struct { + arrow.ExtensionBase +} + +func NewSmallintType() *SmallintType { + return &SmallintType{ExtensionBase: arrow.ExtensionBase{ + Storage: arrow.PrimitiveTypes.Int16}} +} + +func (SmallintType) ArrayType() reflect.Type { return reflect.TypeOf(SmallintArray{}) } + +func (SmallintType) ExtensionName() string { return "smallint" } + +func (SmallintType) Serialize() string { return "smallint" } + +func (s *SmallintType) ExtensionEquals(other arrow.ExtensionType) bool { + return s.Name() == other.Name() +} + +func (SmallintType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) { + if data != "smallint" { + return nil, fmt.Errorf("type identifier did not match: '%s'", data) + } + if !arrow.TypeEqual(storageType, arrow.PrimitiveTypes.Int16) { + return nil, fmt.Errorf("invalid storage type for SmallintType: %s", storageType) + } + return NewSmallintType(), nil +} + var ( _ arrow.ExtensionType = (*UUIDType)(nil) _ arrow.ExtensionType = (*Parametric1Type)(nil) _ arrow.ExtensionType = (*Parametric2Type)(nil) _ arrow.ExtensionType = (*ExtStructType)(nil) _ arrow.ExtensionType = (*DictExtensionType)(nil) + _ arrow.ExtensionType = (*SmallintType)(nil) _ array.ExtensionArray = (*UUIDArray)(nil) _ array.ExtensionArray = (*Parametric1Array)(nil) _ array.ExtensionArray = (*Parametric2Array)(nil) _ array.ExtensionArray = (*ExtStructArray)(nil) _ array.ExtensionArray = (*DictExtensionArray)(nil) + _ array.ExtensionArray = (*SmallintArray)(nil) ) diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go new file mode 100644 index 0000000000000..cdaad6b91853f --- /dev/null +++ b/go/arrow/internal/utils.go @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" +) + +const CurMetadataVersion = flatbuf.MetadataVersionV5 + +// DefaultHasValidityBitmap is a convenience function equivalent to +// calling HasValidityBitmap with CurMetadataVersion. +func DefaultHasValidityBitmap(id arrow.Type) bool { return HasValidityBitmap(id, CurMetadataVersion) } + +// HasValidityBitmap returns whether the given type at the provided version is +// expected to have a validity bitmap in it's representation. +// +// Typically this is necessary because of the change between V4 and V5 +// where union types no longer have validity bitmaps. +func HasValidityBitmap(id arrow.Type, version flatbuf.MetadataVersion) bool { + // in <=V4 Null types had no validity bitmap + // in >=V5 Null and Union types have no validity bitmap + if version < flatbuf.MetadataVersionV5 { + return id != arrow.NULL + } + + switch id { + case arrow.NULL, arrow.DENSE_UNION, arrow.SPARSE_UNION: + return false + } + return true +} diff --git a/go/arrow/ipc/cmd/arrow-cat/main.go b/go/arrow/ipc/cmd/arrow-cat/main.go index b99ab37ac3ee1..0df83a30de42c 100644 --- a/go/arrow/ipc/cmd/arrow-cat/main.go +++ b/go/arrow/ipc/cmd/arrow-cat/main.go @@ -63,8 +63,8 @@ import ( "log" "os" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { @@ -91,7 +91,7 @@ func processStream(w io.Writer, rin io.Reader) error { r, err := ipc.NewReader(rin, ipc.WithAllocator(mem)) if err != nil { if errors.Is(err, io.EOF) { - return nil + break } return err } diff --git a/go/arrow/ipc/cmd/arrow-cat/main_test.go b/go/arrow/ipc/cmd/arrow-cat/main_test.go index 18383d05b3eab..e9cc59c5dcf86 100644 --- a/go/arrow/ipc/cmd/arrow-cat/main_test.go +++ b/go/arrow/ipc/cmd/arrow-cat/main_test.go @@ -24,18 +24,14 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestCatStream(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-cat-stream-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() for _, tc := range []struct { name string @@ -229,11 +225,7 @@ record 3... } func TestCatFile(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-cat-file-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() for _, tc := range []struct { name string diff --git a/go/arrow/ipc/cmd/arrow-file-to-stream/main.go b/go/arrow/ipc/cmd/arrow-file-to-stream/main.go index 1a9d24e1f5eca..4559d6faff68a 100644 --- a/go/arrow/ipc/cmd/arrow-file-to-stream/main.go +++ b/go/arrow/ipc/cmd/arrow-file-to-stream/main.go @@ -24,9 +24,9 @@ import ( "log" "os" - "github.com/apache/arrow/go/v9/arrow/arrio" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/arrio" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go b/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go index be8a5729abe1a..57f0bc5d990d0 100644 --- a/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go +++ b/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go @@ -19,19 +19,14 @@ package main import ( "io" "io/ioutil" - "os" "testing" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestFileToStream(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-file-to-stream-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go index c61a2ccfae38e..e68acb6c46289 100644 --- a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go +++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go @@ -22,12 +22,12 @@ import ( "log" "os" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/arrio" - "github.com/apache/arrow/go/v9/arrow/internal/arrjson" - "github.com/apache/arrow/go/v9/arrow/internal/testing/types" - "github.com/apache/arrow/go/v9/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/arrio" + "github.com/apache/arrow/go/v10/arrow/internal/arrjson" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow/ipc" ) func main() { @@ -199,7 +199,7 @@ func validate(arrowName, jsonName string, verbose bool) error { if !arr.Schema().Equal(jrr.Schema()) { if verbose { - log.Printf("JSON schema:\n%v\nArrow schema:\n%v", arr.Schema(), jrr.Schema()) + log.Printf("JSON schema:\n%v\nArrow schema:\n%v", jrr.Schema(), arr.Schema()) } return fmt.Errorf("schemas did not match") } diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go index 852eae6346d25..2cf1356b123f9 100644 --- a/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go +++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go @@ -21,16 +21,12 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestIntegration(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-integration-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() const verbose = true for name, recs := range arrdata.Records { diff --git a/go/arrow/ipc/cmd/arrow-ls/main.go b/go/arrow/ipc/cmd/arrow-ls/main.go index 8ecda5836609b..27f8ad5eae150 100644 --- a/go/arrow/ipc/cmd/arrow-ls/main.go +++ b/go/arrow/ipc/cmd/arrow-ls/main.go @@ -61,8 +61,8 @@ import ( "log" "os" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-ls/main_test.go b/go/arrow/ipc/cmd/arrow-ls/main_test.go index b671d76bf8150..6fb53b8463b47 100644 --- a/go/arrow/ipc/cmd/arrow-ls/main_test.go +++ b/go/arrow/ipc/cmd/arrow-ls/main_test.go @@ -24,18 +24,14 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestLsStream(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-ls-stream-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() for _, tc := range []struct { name string @@ -174,11 +170,7 @@ records: 3 } func TestLsFile(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-ls-file-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() for _, tc := range []struct { stream bool diff --git a/go/arrow/ipc/cmd/arrow-stream-to-file/main.go b/go/arrow/ipc/cmd/arrow-stream-to-file/main.go index 3bc327b5b5d25..1a3d98f819c0f 100644 --- a/go/arrow/ipc/cmd/arrow-stream-to-file/main.go +++ b/go/arrow/ipc/cmd/arrow-stream-to-file/main.go @@ -24,9 +24,9 @@ import ( "log" "os" - "github.com/apache/arrow/go/v9/arrow/arrio" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/arrio" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { diff --git a/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go b/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go index 55831cce5e97c..667d193153c2f 100644 --- a/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go +++ b/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go @@ -19,19 +19,14 @@ package main import ( "io" "io/ioutil" - "os" "testing" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestStreamToFile(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-stream-to-file-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { @@ -60,6 +55,7 @@ func TestStreamToFile(t *testing.T) { if err != nil { t.Fatal(err) } + defer o.Close() err = processStream(o, f) if err != nil { diff --git a/go/arrow/ipc/compression.go b/go/arrow/ipc/compression.go index e62ecc90f217e..a8c2b61967160 100644 --- a/go/arrow/ipc/compression.go +++ b/go/arrow/ipc/compression.go @@ -19,8 +19,8 @@ package ipc import ( "io" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" "github.com/klauspost/compress/zstd" "github.com/pierrec/lz4/v4" ) diff --git a/go/arrow/ipc/endian_swap.go b/go/arrow/ipc/endian_swap.go new file mode 100644 index 0000000000000..a61b653557655 --- /dev/null +++ b/go/arrow/ipc/endian_swap.go @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipc + +import ( + "errors" + "math/bits" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" +) + +// swap the endianness of the array's buffers as needed in-place to save +// the cost of reallocation. +// +// assumes that nested data buffers are never re-used, if an *array.Data +// child is re-used among the children or the dictionary then this might +// end up double-swapping (putting it back into the original endianness). +// if it is needed to support re-using the buffers, then this can be +// re-factored to instead return a NEW array.Data object with newly +// allocated buffers, rather than doing it in place. +// +// For now this is intended to be used by the IPC readers after loading +// arrays from an IPC message which currently is guaranteed to not re-use +// buffers between arrays. +func swapEndianArrayData(data *array.Data) error { + if data.Offset() != 0 { + return errors.New("unsupported data format: data.offset != 0") + } + if err := swapType(data.DataType(), data); err != nil { + return err + } + return swapChildren(data.Children()) +} + +func swapChildren(children []arrow.ArrayData) (err error) { + for i := range children { + if err = swapEndianArrayData(children[i].(*array.Data)); err != nil { + break + } + } + return +} + +func swapType(dt arrow.DataType, data *array.Data) (err error) { + switch dt.ID() { + case arrow.BINARY, arrow.STRING: + swapOffsets(1, 32, data) + return + case arrow.LARGE_BINARY, arrow.LARGE_STRING: + swapOffsets(1, 64, data) + return + case arrow.NULL, arrow.BOOL, arrow.INT8, arrow.UINT8, + arrow.FIXED_SIZE_BINARY, arrow.FIXED_SIZE_LIST, arrow.STRUCT: + return + } + + switch dt := dt.(type) { + case *arrow.Decimal128Type: + rawdata := arrow.Uint64Traits.CastFromBytes(data.Buffers()[1].Bytes()) + length := data.Buffers()[1].Len() / arrow.Decimal128SizeBytes + for i := 0; i < length; i++ { + idx := i * 2 + tmp := bits.ReverseBytes64(rawdata[idx]) + rawdata[idx] = bits.ReverseBytes64(rawdata[idx+1]) + rawdata[idx+1] = tmp + } + case *arrow.Decimal256Type: + rawdata := arrow.Uint64Traits.CastFromBytes(data.Buffers()[1].Bytes()) + length := data.Buffers()[1].Len() / arrow.Decimal256SizeBytes + for i := 0; i < length; i++ { + idx := i * 4 + tmp0 := bits.ReverseBytes64(rawdata[idx]) + tmp1 := bits.ReverseBytes64(rawdata[idx+1]) + tmp2 := bits.ReverseBytes64(rawdata[idx+2]) + rawdata[idx] = bits.ReverseBytes64(rawdata[idx+3]) + rawdata[idx+1] = tmp2 + rawdata[idx+2] = tmp1 + rawdata[idx+3] = tmp0 + } + case arrow.UnionType: + if dt.Mode() == arrow.DenseMode { + swapOffsets(2, 32, data) + } + case *arrow.ListType: + swapOffsets(1, 32, data) + case *arrow.LargeListType: + swapOffsets(1, 64, data) + case *arrow.MapType: + swapOffsets(1, 32, data) + case *arrow.DayTimeIntervalType: + byteSwapBuffer(32, data.Buffers()[1]) + case *arrow.MonthDayNanoIntervalType: + rawdata := arrow.MonthDayNanoIntervalTraits.CastFromBytes(data.Buffers()[1].Bytes()) + for i, tmp := range rawdata { + rawdata[i].Days = int32(bits.ReverseBytes32(uint32(tmp.Days))) + rawdata[i].Months = int32(bits.ReverseBytes32(uint32(tmp.Months))) + rawdata[i].Nanoseconds = int64(bits.ReverseBytes64(uint64(tmp.Nanoseconds))) + } + case arrow.ExtensionType: + return swapType(dt.StorageType(), data) + case *arrow.DictionaryType: + // dictionary itself was already swapped in ReadDictionary calls + return swapType(dt.IndexType, data) + case arrow.FixedWidthDataType: + byteSwapBuffer(dt.BitWidth(), data.Buffers()[1]) + } + return +} + +// this can get called on an invalid Array Data object by the IPC reader, +// so we won't rely on the data.length and will instead rely on the buffer's +// own size instead. +func byteSwapBuffer(bw int, buf *memory.Buffer) { + if bw == 1 || buf == nil { + // if byte width == 1, no need to swap anything + return + } + + switch bw { + case 16: + data := arrow.Uint16Traits.CastFromBytes(buf.Bytes()) + for i := range data { + data[i] = bits.ReverseBytes16(data[i]) + } + case 32: + data := arrow.Uint32Traits.CastFromBytes(buf.Bytes()) + for i := range data { + data[i] = bits.ReverseBytes32(data[i]) + } + case 64: + data := arrow.Uint64Traits.CastFromBytes(buf.Bytes()) + for i := range data { + data[i] = bits.ReverseBytes64(data[i]) + } + } +} + +func swapOffsets(index int, bitWidth int, data *array.Data) { + if data.Buffers()[index] == nil || data.Buffers()[index].Len() == 0 { + return + } + + // other than unions, offset has one more element than the data.length + // don't yet implement large types, so hardcode 32bit offsets for now + byteSwapBuffer(bitWidth, data.Buffers()[index]) +} diff --git a/go/arrow/ipc/endian_swap_test.go b/go/arrow/ipc/endian_swap_test.go new file mode 100644 index 0000000000000..2eef6183a2c08 --- /dev/null +++ b/go/arrow/ipc/endian_swap_test.go @@ -0,0 +1,299 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ipc + +import ( + "strings" + "testing" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func AssertArrayDataEqualWithSwappedEndian(t *testing.T, target, expected arrow.ArrayData) bool { + assert.NoError(t, swapEndianArrayData(target.(*array.Data))) + swappedArray := array.MakeFromData(target) + defer swappedArray.Release() + + expectedArray := array.MakeFromData(expected) + defer expectedArray.Release() + + return assert.Truef(t, array.Equal(swappedArray, expectedArray), "got: %s, expected: %s\n", swappedArray, expectedArray) +} + +func TestSwapEndianPrimitiveArrayData(t *testing.T) { + nullBuffer := memory.NewBufferBytes([]byte("\xff")) + + tests := []struct { + dt arrow.DataType + len int + input, expected string + }{ + {arrow.Null, 0, "", ""}, + {arrow.PrimitiveTypes.Int32, 0, "", ""}, + {arrow.FixedWidthTypes.Boolean, 8, "01234567", "01234567"}, + {arrow.PrimitiveTypes.Int8, 8, "01234567", "01234567"}, + {arrow.PrimitiveTypes.Uint16, 4, "01234567", "10325476"}, + {arrow.PrimitiveTypes.Int32, 2, "01234567", "32107654"}, + {arrow.PrimitiveTypes.Uint64, 1, "01234567", "76543210"}, + {&arrow.Decimal128Type{Precision: 38, Scale: 10}, 1, "0123456789abcdef", "fedcba9876543210"}, + {arrow.PrimitiveTypes.Float32, 2, "01200560", "02100650"}, + {arrow.PrimitiveTypes.Float64, 1, "01200560", "06500210"}, + } + + for _, tt := range tests { + t.Run(tt.dt.String(), func(t *testing.T) { + var target, expected arrow.ArrayData + if tt.dt == arrow.Null { + target = array.NewData(arrow.Null, 0, []*memory.Buffer{nil}, nil, 0, 0) + expected = target + } else { + target = array.NewData(tt.dt, tt.len, []*memory.Buffer{nullBuffer, memory.NewBufferBytes([]byte(tt.input))}, nil, 0, 0) + expected = array.NewData(tt.dt, tt.len, []*memory.Buffer{nullBuffer, memory.NewBufferBytes([]byte(tt.expected))}, nil, 0, 0) + defer target.Release() + defer expected.Release() + } + AssertArrayDataEqualWithSwappedEndian(t, target, expected) + }) + } + + data := array.NewData(arrow.PrimitiveTypes.Int64, 1, []*memory.Buffer{nullBuffer, memory.NewBufferBytes([]byte("01234567"))}, nil, 0, 1) + assert.Error(t, swapEndianArrayData(data)) +} + +func replaceBuffer(data *array.Data, idx int, bufdata []byte) *array.Data { + out := data.Copy() + buffers := out.Buffers() + buffers[idx].Release() + buffers[idx] = memory.NewBufferBytes(bufdata) + return out +} + +func replaceBuffersInChild(data *array.Data, childIdx int, bufdata []byte) *array.Data { + out := data.Copy() + // assume updating only buffer[1] in child data + children := out.Children() + child := children[childIdx].(*array.Data).Copy() + children[childIdx].Release() + child.Buffers()[1].Release() + child.Buffers()[1] = memory.NewBufferBytes(bufdata) + children[childIdx] = child + + return out +} + +func replaceBuffersInDict(data *array.Data, bufferIdx int, bufdata []byte) *array.Data { + out := data.Copy() + dictData := out.Dictionary().(*array.Data).Copy() + dictData.Buffers()[bufferIdx].Release() + dictData.Buffers()[bufferIdx] = memory.NewBufferBytes(bufdata) + defer dictData.Release() + out.SetDictionary(dictData) + return out +} + +func TestSwapEndianArrayDataBinary(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + // binary type assumes the json string data is base64 encoded + // MDEyMw== -> 0123 + // NDU= -> 45 + arr, _, err := array.FromJSON(mem, arrow.BinaryTypes.Binary, strings.NewReader(`["MDEyMw==", null, "NDU="]`)) + require.NoError(t, err) + defer arr.Release() + + var offsets []byte + if endian.IsBigEndian { + offsets = []byte{0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0} + } else { + offsets = []byte{0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6} + } + expected := arr.Data().(*array.Data) + test := replaceBuffer(expected, 1, offsets) + defer test.Release() + AssertArrayDataEqualWithSwappedEndian(t, test, expected) +} + +func TestSwapEndianArrayString(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + arr, _, err := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["ABCD", null, "EF"]`)) + require.NoError(t, err) + defer arr.Release() + + var offsets []byte + if endian.IsBigEndian { + offsets = []byte{0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0} + } else { + offsets = []byte{0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6} + } + + expected := arr.Data().(*array.Data) + test := replaceBuffer(expected, 1, offsets) + defer test.Release() + AssertArrayDataEqualWithSwappedEndian(t, test, expected) +} + +func TestSwapEndianArrayListType(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + dt := arrow.ListOf(arrow.PrimitiveTypes.Int32) + arr, _, err := array.FromJSON(mem, dt, strings.NewReader(`[[0, 1, 2, 3], null, [4, 5]]`)) + require.NoError(t, err) + defer arr.Release() + + var ( + offsets, data []byte + ) + if endian.IsBigEndian { + offsets = []byte{0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0} + data = []byte{0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0} + } else { + offsets = []byte{0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 6} + data = []byte{0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5} + } + + expected := arr.Data().(*array.Data) + test := replaceBuffer(expected, 1, offsets) + defer test.Release() + test = replaceBuffersInChild(test, 0, data) + defer test.Release() + + AssertArrayDataEqualWithSwappedEndian(t, test, expected) +} + +func TestSwapEndianArrayFixedSizeList(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + dt := arrow.FixedSizeListOf(2, arrow.PrimitiveTypes.Int32) + arr, _, err := array.FromJSON(mem, dt, strings.NewReader(`[[0, 1], null, [2, 3]]`)) + require.NoError(t, err) + defer arr.Release() + + var data []byte + if endian.IsBigEndian { + data = []byte{0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} + } else { + data = []byte{0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3} + } + + expected := arr.Data().(*array.Data) + test := replaceBuffersInChild(expected, 0, data) + defer test.Release() + + AssertArrayDataEqualWithSwappedEndian(t, test, expected) +} + +func TestSwapEndianArrayDictType(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.PrimitiveTypes.Int16} + dict, _, err := array.FromJSON(mem, dt.ValueType, strings.NewReader(`[4, 5, 6, 7]`)) + require.NoError(t, err) + defer dict.Release() + + indices, _, _ := array.FromJSON(mem, dt.IndexType, strings.NewReader("[0, 2, 3]")) + defer indices.Release() + + arr := array.NewDictionaryArray(dt, indices, dict) + defer arr.Release() + + var ( + data1, data2 []byte + ) + if endian.IsBigEndian { + data1 = []byte{0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} + data2 = []byte{4, 0, 5, 0, 6, 0, 7, 0} + } else { + data1 = []byte{0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3} + data2 = []byte{0, 4, 0, 5, 0, 6, 0, 7} + } + + expected := arr.Data().(*array.Data) + test := replaceBuffer(expected, 1, data1) + defer test.Release() + test = replaceBuffersInDict(test, 1, data2) + defer test.Release() + + // dictionary must be explicitly swapped! + assert.NoError(t, swapEndianArrayData(test.Dictionary().(*array.Data))) + AssertArrayDataEqualWithSwappedEndian(t, test, expected) +} + +func TestSwapEndianArrayStruct(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + arr, _, err := array.FromJSON(mem, arrow.StructOf( + arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true}, + ), strings.NewReader(`[{"a": 4, "b": null}, {"a": null, "b": "foo"}]`)) + require.NoError(t, err) + defer arr.Release() + + var data1, data2 []byte + if endian.IsBigEndian { + data1 = []byte{4, 0, 0, 0, 0, 0, 0, 0} + data2 = []byte{0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0} + } else { + data1 = []byte{0, 0, 0, 4, 0, 0, 0, 0} + data2 = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3} + } + + expected := arr.Data().(*array.Data) + test := replaceBuffersInChild(expected, 0, data1) + defer test.Release() + test = replaceBuffersInChild(test, 1, data2) + defer test.Release() + AssertArrayDataEqualWithSwappedEndian(t, test, expected) +} + +func TestSwapEndianArrayExtensionType(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + arrInt16, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader(`[0, 1, 2, 3]`)) + defer arrInt16.Release() + + extData := array.NewData(types.NewSmallintType(), arrInt16.Len(), arrInt16.Data().Buffers(), nil, 0, 0) + defer extData.Release() + + arr := array.MakeFromData(extData) + defer arr.Release() + + var data []byte + if endian.IsBigEndian { + data = []byte{0, 0, 1, 0, 2, 0, 3, 0} + } else { + data = []byte{0, 0, 0, 1, 0, 2, 0, 3} + } + + expected := arr.Data().(*array.Data) + test := replaceBuffer(expected, 1, data) + defer test.Release() + AssertArrayDataEqualWithSwappedEndian(t, test, expected) +} diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go index ab6e7bf108faa..8a1a9b9940d50 100644 --- a/go/arrow/ipc/file_reader.go +++ b/go/arrow/ipc/file_reader.go @@ -23,12 +23,14 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/dictutils" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/internal" + "github.com/apache/arrow/go/v10/arrow/internal/dictutils" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/memory" ) // FileReader is an Arrow file reader. @@ -50,7 +52,8 @@ type FileReader struct { irec int // current record index. used for the arrio.Reader interface err error // last error - mem memory.Allocator + mem memory.Allocator + swapEndianness bool } // NewFileReader opens an Arrow file using the provided reader r. @@ -79,7 +82,7 @@ func NewFileReader(r ReadAtSeeker, opts ...Option) (*FileReader, error) { return nil, fmt.Errorf("arrow/ipc: could not decode footer: %w", err) } - err = f.readSchema() + err = f.readSchema(cfg.ensureNativeEndian) if err != nil { return nil, fmt.Errorf("arrow/ipc: could not decode schema: %w", err) } @@ -131,7 +134,7 @@ func (f *FileReader) readFooter() error { return err } -func (f *FileReader) readSchema() error { +func (f *FileReader) readSchema(ensureNativeEndian bool) error { var ( err error kind dictutils.Kind @@ -146,6 +149,11 @@ func (f *FileReader) readSchema() error { return fmt.Errorf("arrow/ipc: could not read schema: %w", err) } + if ensureNativeEndian && !f.schema.IsNativeEndian() { + f.swapEndianness = true + f.schema = f.schema.WithEndianness(endian.NativeEndian) + } + for i := 0; i < f.NumDictionaries(); i++ { blk, err := f.dict(i) if err != nil { @@ -165,7 +173,7 @@ func (f *FileReader) readSchema() error { return err } - kind, err = readDictionary(&f.memo, msg.meta, bytes.NewReader(msg.body.Bytes()), f.mem) + kind, err = readDictionary(&f.memo, msg.meta, bytes.NewReader(msg.body.Bytes()), f.swapEndianness, f.mem) if err != nil { return err } @@ -293,7 +301,7 @@ func (f *FileReader) RecordAt(i int) (arrow.Record, error) { return nil, fmt.Errorf("arrow/ipc: message %d is not a Record", i) } - return newRecord(f.schema, &f.memo, msg.meta, bytes.NewReader(msg.body.Bytes()), f.mem), nil + return newRecord(f.schema, &f.memo, msg.meta, bytes.NewReader(msg.body.Bytes()), f.swapEndianness, f.mem), nil } // Read reads the current record from the underlying stream and an error, if any. @@ -315,7 +323,7 @@ func (f *FileReader) ReadAt(i int64) (arrow.Record, error) { return f.Record(int(i)) } -func newRecord(schema *arrow.Schema, memo *dictutils.Memo, meta *memory.Buffer, body ReadAtSeeker, mem memory.Allocator) arrow.Record { +func newRecord(schema *arrow.Schema, memo *dictutils.Memo, meta *memory.Buffer, body ReadAtSeeker, swapEndianness bool, mem memory.Allocator) arrow.Record { var ( msg = flatbuf.GetRootAsMessage(meta.Bytes(), 0) md flatbuf.RecordBatch @@ -337,8 +345,9 @@ func newRecord(schema *arrow.Schema, memo *dictutils.Memo, meta *memory.Buffer, codec: codec, mem: mem, }, - memo: memo, - max: kMaxNestingDepth, + memo: memo, + max: kMaxNestingDepth, + version: MetadataVersion(msg.Version()), } pos := dictutils.NewFieldPos() @@ -351,6 +360,10 @@ func newRecord(schema *arrow.Schema, memo *dictutils.Memo, meta *memory.Buffer, panic(err) } + if swapEndianness { + swapEndianArrayData(data.(*array.Data)) + } + cols[i] = array.MakeFromData(data) defer cols[i].Release() } @@ -368,8 +381,9 @@ type ipcSource struct { func (src *ipcSource) buffer(i int) *memory.Buffer { var buf flatbuf.Buffer if !src.meta.Buffers(&buf, i) { - panic("buffer index out of bound") + panic("arrow/ipc: buffer index out of bound") } + if buf.Length() == 0 { return memory.NewBufferBytes(nil) } @@ -411,7 +425,7 @@ func (src *ipcSource) buffer(i int) *memory.Buffer { func (src *ipcSource) fieldMetadata(i int) *flatbuf.FieldNode { var node flatbuf.FieldNode if !src.meta.Nodes(&node, i) { - panic("field metadata out of bound") + panic("arrow/ipc: field metadata out of bound") } return &node } @@ -422,6 +436,7 @@ type arrayLoaderContext struct { ibuffer int max int memo *dictutils.Memo + version MetadataVersion } func (ctx *arrayLoaderContext) field() *flatbuf.FieldNode { @@ -450,7 +465,7 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData { *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type, *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type, *arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type, - *arrow.Decimal128Type, + *arrow.Decimal128Type, *arrow.Decimal256Type, *arrow.Time32Type, *arrow.Time64Type, *arrow.TimestampType, *arrow.Date32Type, *arrow.Date64Type, @@ -458,7 +473,7 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData { *arrow.DurationType: return ctx.loadPrimitive(dt) - case *arrow.BinaryType, *arrow.StringType: + case *arrow.BinaryType, *arrow.StringType, *arrow.LargeStringType, *arrow.LargeBinaryType: return ctx.loadBinary(dt) case *arrow.FixedSizeBinaryType: @@ -467,6 +482,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData { case *arrow.ListType: return ctx.loadList(dt) + case *arrow.LargeListType: + return ctx.loadList(dt) + case *arrow.FixedSizeListType: return ctx.loadFixedSizeList(dt) @@ -481,21 +499,27 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData { defer storage.Release() return array.NewData(dt, storage.Len(), storage.Buffers(), storage.Children(), storage.NullN(), storage.Offset()) + case arrow.UnionType: + return ctx.loadUnion(dt) + default: - panic(fmt.Errorf("array type %T not handled yet", dt)) + panic(fmt.Errorf("arrow/ipc: array type %T not handled yet", dt)) } } -func (ctx *arrayLoaderContext) loadCommon(nbufs int) (*flatbuf.FieldNode, []*memory.Buffer) { +func (ctx *arrayLoaderContext) loadCommon(typ arrow.Type, nbufs int) (*flatbuf.FieldNode, []*memory.Buffer) { buffers := make([]*memory.Buffer, 0, nbufs) field := ctx.field() var buf *memory.Buffer - switch field.NullCount() { - case 0: - ctx.ibuffer++ - default: - buf = ctx.buffer() + + if internal.HasValidityBitmap(typ, flatbuf.MetadataVersion(ctx.version)) { + switch field.NullCount() { + case 0: + ctx.ibuffer++ + default: + buf = ctx.buffer() + } } buffers = append(buffers, buf) @@ -518,7 +542,7 @@ func (ctx *arrayLoaderContext) loadNull() arrow.ArrayData { } func (ctx *arrayLoaderContext) loadPrimitive(dt arrow.DataType) arrow.ArrayData { - field, buffers := ctx.loadCommon(2) + field, buffers := ctx.loadCommon(dt.ID(), 2) switch field.Length() { case 0: @@ -534,7 +558,7 @@ func (ctx *arrayLoaderContext) loadPrimitive(dt arrow.DataType) arrow.ArrayData } func (ctx *arrayLoaderContext) loadBinary(dt arrow.DataType) arrow.ArrayData { - field, buffers := ctx.loadCommon(3) + field, buffers := ctx.loadCommon(dt.ID(), 3) buffers = append(buffers, ctx.buffer(), ctx.buffer()) defer releaseBuffers(buffers) @@ -542,7 +566,7 @@ func (ctx *arrayLoaderContext) loadBinary(dt arrow.DataType) arrow.ArrayData { } func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType) arrow.ArrayData { - field, buffers := ctx.loadCommon(2) + field, buffers := ctx.loadCommon(dt.ID(), 2) buffers = append(buffers, ctx.buffer()) defer releaseBuffers(buffers) @@ -550,7 +574,7 @@ func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType } func (ctx *arrayLoaderContext) loadMap(dt *arrow.MapType) arrow.ArrayData { - field, buffers := ctx.loadCommon(2) + field, buffers := ctx.loadCommon(dt.ID(), 2) buffers = append(buffers, ctx.buffer()) defer releaseBuffers(buffers) @@ -560,8 +584,13 @@ func (ctx *arrayLoaderContext) loadMap(dt *arrow.MapType) arrow.ArrayData { return array.NewData(dt, int(field.Length()), buffers, []arrow.ArrayData{sub}, int(field.NullCount()), 0) } -func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) arrow.ArrayData { - field, buffers := ctx.loadCommon(2) +type listLike interface { + arrow.DataType + Elem() arrow.DataType +} + +func (ctx *arrayLoaderContext) loadList(dt listLike) arrow.ArrayData { + field, buffers := ctx.loadCommon(dt.ID(), 2) buffers = append(buffers, ctx.buffer()) defer releaseBuffers(buffers) @@ -572,7 +601,7 @@ func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) arrow.ArrayData { } func (ctx *arrayLoaderContext) loadFixedSizeList(dt *arrow.FixedSizeListType) arrow.ArrayData { - field, buffers := ctx.loadCommon(1) + field, buffers := ctx.loadCommon(dt.ID(), 1) defer releaseBuffers(buffers) sub := ctx.loadChild(dt.Elem()) @@ -582,7 +611,7 @@ func (ctx *arrayLoaderContext) loadFixedSizeList(dt *arrow.FixedSizeListType) ar } func (ctx *arrayLoaderContext) loadStruct(dt *arrow.StructType) arrow.ArrayData { - field, buffers := ctx.loadCommon(1) + field, buffers := ctx.loadCommon(dt.ID(), 1) defer releaseBuffers(buffers) subs := make([]arrow.ArrayData, len(dt.Fields())) @@ -598,7 +627,48 @@ func (ctx *arrayLoaderContext) loadStruct(dt *arrow.StructType) arrow.ArrayData return array.NewData(dt, int(field.Length()), buffers, subs, int(field.NullCount()), 0) } -func readDictionary(memo *dictutils.Memo, meta *memory.Buffer, body ReadAtSeeker, mem memory.Allocator) (dictutils.Kind, error) { +func (ctx *arrayLoaderContext) loadUnion(dt arrow.UnionType) arrow.ArrayData { + // Sparse unions have 2 buffers (a nil validity bitmap, and the type ids) + nBuffers := 2 + // Dense unions have a third buffer, the offsets + if dt.Mode() == arrow.DenseMode { + nBuffers = 3 + } + + field, buffers := ctx.loadCommon(dt.ID(), nBuffers) + if field.NullCount() != 0 && buffers[0] != nil { + panic("arrow/ipc: cannot read pre-1.0.0 union array with top-level validity bitmap") + } + + switch field.Length() { + case 0: + buffers = append(buffers, memory.NewBufferBytes([]byte{})) + ctx.ibuffer++ + if dt.Mode() == arrow.DenseMode { + buffers = append(buffers, nil) + ctx.ibuffer++ + } + default: + buffers = append(buffers, ctx.buffer()) + if dt.Mode() == arrow.DenseMode { + buffers = append(buffers, ctx.buffer()) + } + } + + defer releaseBuffers(buffers) + subs := make([]arrow.ArrayData, len(dt.Fields())) + for i, f := range dt.Fields() { + subs[i] = ctx.loadChild(f.Type) + } + defer func() { + for i := range subs { + subs[i].Release() + } + }() + return array.NewData(dt, int(field.Length()), buffers, subs, 0, 0) +} + +func readDictionary(memo *dictutils.Memo, meta *memory.Buffer, body ReadAtSeeker, swapEndianness bool, mem memory.Allocator) (dictutils.Kind, error) { var ( msg = flatbuf.GetRootAsMessage(meta.Bytes(), 0) md flatbuf.DictionaryBatch @@ -635,6 +705,10 @@ func readDictionary(memo *dictutils.Memo, meta *memory.Buffer, body ReadAtSeeker dict := ctx.loadArray(valueType) defer dict.Release() + if swapEndianness { + swapEndianArrayData(dict.(*array.Data)) + } + if md.IsDelta() { memo.AddDelta(id, dict) return dictutils.KindDelta, nil diff --git a/go/arrow/ipc/file_test.go b/go/arrow/ipc/file_test.go index e188fcf992677..a8fa1be91f3f5 100644 --- a/go/arrow/ipc/file_test.go +++ b/go/arrow/ipc/file_test.go @@ -19,20 +19,15 @@ package ipc_test import ( "fmt" "io/ioutil" - "os" "testing" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestFile(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-file-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { @@ -53,11 +48,7 @@ func TestFile(t *testing.T) { } func TestFileCompressed(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-file-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() compressTypes := []flatbuf.CompressionType{ flatbuf.CompressionTypeLZ4_FRAME, flatbuf.CompressionTypeZSTD, diff --git a/go/arrow/ipc/file_writer.go b/go/arrow/ipc/file_writer.go index 38b7116ebdc29..2d91ea4c67b20 100644 --- a/go/arrow/ipc/file_writer.go +++ b/go/arrow/ipc/file_writer.go @@ -21,11 +21,11 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/dictutils" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal/dictutils" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/memory" ) // PayloadWriter is an interface for injecting a different payloadwriter diff --git a/go/arrow/ipc/ipc.go b/go/arrow/ipc/ipc.go index 65207a6d6589a..71d810b1361c6 100644 --- a/go/arrow/ipc/ipc.go +++ b/go/arrow/ipc/ipc.go @@ -19,10 +19,10 @@ package ipc import ( "io" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/arrio" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/arrio" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/memory" ) const ( @@ -66,14 +66,17 @@ type config struct { footer struct { offset int64 } - codec flatbuf.CompressionType - compressNP int + codec flatbuf.CompressionType + compressNP int + ensureNativeEndian bool + noAutoSchema bool } func newConfig(opts ...Option) *config { cfg := &config{ - alloc: memory.NewGoAllocator(), - codec: -1, // uncompressed + alloc: memory.NewGoAllocator(), + codec: -1, // uncompressed + ensureNativeEndian: true, } for _, opt := range opts { @@ -134,6 +137,29 @@ func WithCompressConcurrency(n int) Option { } } +// WithEnsureNativeEndian specifies whether or not to automatically byte-swap +// buffers with endian-sensitive data if the schema's endianness is not the +// platform-native endianness. This includes all numeric types, temporal types, +// decimal types, as well as the offset buffers of variable-sized binary and +// list-like types. +// +// This is only relevant to ipc Reader objects, not to writers. This defaults +// to true. +func WithEnsureNativeEndian(v bool) Option { + return func(cfg *config) { + cfg.ensureNativeEndian = v + } +} + +// WithDelayedReadSchema alters the ipc.Reader behavior to delay attempting +// to read the schema from the stream until the first call to Next instead +// of immediately attempting to read a schema from the stream when created. +func WithDelayReadSchema(v bool) Option { + return func(cfg *config) { + cfg.noAutoSchema = v + } +} + var ( _ arrio.Reader = (*Reader)(nil) _ arrio.Writer = (*Writer)(nil) diff --git a/go/arrow/ipc/ipc_test.go b/go/arrow/ipc/ipc_test.go index bcfbd8697ca8f..2f0816c0d0251 100644 --- a/go/arrow/ipc/ipc_test.go +++ b/go/arrow/ipc/ipc_test.go @@ -24,10 +24,10 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/ipc" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/ipc" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/ipc/message.go b/go/arrow/ipc/message.go index f7a631d7ce89c..db9f61426aa33 100644 --- a/go/arrow/ipc/message.go +++ b/go/arrow/ipc/message.go @@ -22,9 +22,9 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/memory" ) // MetadataVersion represents the Arrow metadata version. diff --git a/go/arrow/ipc/message_test.go b/go/arrow/ipc/message_test.go index 8f498076e9ed1..1d3b7caa59d6a 100644 --- a/go/arrow/ipc/message_test.go +++ b/go/arrow/ipc/message_test.go @@ -22,9 +22,9 @@ import ( "io" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestMessageReaderBodyInAllocator(t *testing.T) { diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go index eaf2228f3786d..0bcd24df0450c 100644 --- a/go/arrow/ipc/metadata.go +++ b/go/arrow/ipc/metadata.go @@ -18,14 +18,16 @@ package ipc import ( "encoding/binary" + "errors" "fmt" "io" "sort" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/dictutils" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/internal/dictutils" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/memory" flatbuffers "github.com/google/flatbuffers/go" ) @@ -284,6 +286,15 @@ func (fv *fieldVisitor) visit(field arrow.Field) { flatbuf.DecimalStart(fv.b) flatbuf.DecimalAddPrecision(fv.b, dt.Precision) flatbuf.DecimalAddScale(fv.b, dt.Scale) + flatbuf.DecimalAddBitWidth(fv.b, 128) + fv.offset = flatbuf.DecimalEnd(fv.b) + + case *arrow.Decimal256Type: + fv.dtype = flatbuf.TypeDecimal + flatbuf.DecimalStart(fv.b) + flatbuf.DecimalAddPrecision(fv.b, dt.Precision) + flatbuf.DecimalAddScale(fv.b, dt.Scale) + flatbuf.DecimalAddBitWidth(fv.b, 256) fv.offset = flatbuf.DecimalEnd(fv.b) case *arrow.FixedSizeBinaryType: @@ -297,11 +308,21 @@ func (fv *fieldVisitor) visit(field arrow.Field) { flatbuf.BinaryStart(fv.b) fv.offset = flatbuf.BinaryEnd(fv.b) + case *arrow.LargeBinaryType: + fv.dtype = flatbuf.TypeLargeBinary + flatbuf.LargeBinaryStart(fv.b) + fv.offset = flatbuf.LargeBinaryEnd(fv.b) + case *arrow.StringType: fv.dtype = flatbuf.TypeUtf8 flatbuf.Utf8Start(fv.b) fv.offset = flatbuf.Utf8End(fv.b) + case *arrow.LargeStringType: + fv.dtype = flatbuf.TypeLargeUtf8 + flatbuf.LargeUtf8Start(fv.b) + fv.offset = flatbuf.LargeUtf8End(fv.b) + case *arrow.Date32Type: fv.dtype = flatbuf.TypeDate flatbuf.DateStart(fv.b) @@ -359,6 +380,12 @@ func (fv *fieldVisitor) visit(field arrow.Field) { flatbuf.ListStart(fv.b) fv.offset = flatbuf.ListEnd(fv.b) + case *arrow.LargeListType: + fv.dtype = flatbuf.TypeLargeList + fv.kids = append(fv.kids, fieldToFB(fv.b, fv.pos.Child(0), dt.ElemField(), fv.memo)) + flatbuf.LargeListStart(fv.b) + fv.offset = flatbuf.LargeListEnd(fv.b) + case *arrow.FixedSizeListType: fv.dtype = flatbuf.TypeFixedSizeList fv.kids = append(fv.kids, fieldToFB(fv.b, fv.pos.Child(0), dt.ElemField(), fv.memo)) @@ -408,6 +435,33 @@ func (fv *fieldVisitor) visit(field arrow.Field) { field.Type = dt.ValueType fv.visit(field) + case arrow.UnionType: + fv.dtype = flatbuf.TypeUnion + offsets := make([]flatbuffers.UOffsetT, len(dt.Fields())) + for i, field := range dt.Fields() { + offsets[i] = fieldToFB(fv.b, fv.pos.Child(int32(i)), field, fv.memo) + } + + codes := dt.TypeCodes() + flatbuf.UnionStartTypeIdsVector(fv.b, len(codes)) + + for i := len(codes) - 1; i >= 0; i-- { + fv.b.PlaceInt32(int32(codes[i])) + } + fbTypeIDs := fv.b.EndVector(len(dt.TypeCodes())) + flatbuf.UnionStart(fv.b) + switch dt.Mode() { + case arrow.SparseMode: + flatbuf.UnionAddMode(fv.b, flatbuf.UnionModeSparse) + case arrow.DenseMode: + flatbuf.UnionAddMode(fv.b, flatbuf.UnionModeDense) + default: + panic("invalid union mode") + } + flatbuf.UnionAddTypeIds(fv.b, fbTypeIDs) + fv.offset = flatbuf.UnionEnd(fv.b) + fv.kids = append(fv.kids, offsets...) + default: err := fmt.Errorf("arrow/ipc: invalid data type %v", dt) panic(err) // FIXME(sbinet): implement all data-types. @@ -628,6 +682,12 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr case flatbuf.TypeUtf8: return arrow.BinaryTypes.String, nil + case flatbuf.TypeLargeBinary: + return arrow.BinaryTypes.LargeBinary, nil + + case flatbuf.TypeLargeUtf8: + return arrow.BinaryTypes.LargeString, nil + case flatbuf.TypeBool: return arrow.FixedWidthTypes.Boolean, nil @@ -638,6 +698,13 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr dt := arrow.ListOfField(children[0]) return dt, nil + case flatbuf.TypeLargeList: + if len(children) != 1 { + return nil, fmt.Errorf("arrow/ipc: LargeList must have exactly 1 child field (got=%d)", len(children)) + } + dt := arrow.LargeListOfField(children[0]) + return dt, nil + case flatbuf.TypeFixedSizeList: var dt flatbuf.FixedSizeList dt.Init(data.Bytes, data.Pos) @@ -650,6 +717,40 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr case flatbuf.TypeStruct_: return arrow.StructOf(children...), nil + case flatbuf.TypeUnion: + var dt flatbuf.Union + dt.Init(data.Bytes, data.Pos) + var ( + mode arrow.UnionMode + typeIDs []arrow.UnionTypeCode + ) + + switch dt.Mode() { + case flatbuf.UnionModeSparse: + mode = arrow.SparseMode + case flatbuf.UnionModeDense: + mode = arrow.DenseMode + } + + typeIDLen := dt.TypeIdsLength() + + if typeIDLen == 0 { + for i := range children { + typeIDs = append(typeIDs, int8(i)) + } + } else { + for i := 0; i < typeIDLen; i++ { + id := dt.TypeIds(i) + code := arrow.UnionTypeCode(id) + if int32(code) != id { + return nil, errors.New("union type id out of bounds") + } + typeIDs = append(typeIDs, code) + } + } + + return arrow.UnionOf(mode, children, typeIDs), nil + case flatbuf.TypeTime: var dt flatbuf.Time dt.Init(data.Bytes, data.Pos) @@ -779,7 +880,14 @@ func floatToFB(b *flatbuffers.Builder, bw int32) flatbuffers.UOffsetT { } func decimalFromFB(data flatbuf.Decimal) (arrow.DataType, error) { - return &arrow.Decimal128Type{Precision: data.Precision(), Scale: data.Scale()}, nil + switch data.BitWidth() { + case 128: + return &arrow.Decimal128Type{Precision: data.Precision(), Scale: data.Scale()}, nil + case 256: + return &arrow.Decimal256Type{Precision: data.Precision(), Scale: data.Scale()}, nil + default: + return nil, fmt.Errorf("arrow/ipc: invalid decimal bitwidth: %d", data.BitWidth()) + } } func timeFromFB(data flatbuf.Time) (arrow.DataType, error) { @@ -922,7 +1030,7 @@ func schemaFromFB(schema *flatbuf.Schema, memo *dictutils.Memo) (*arrow.Schema, return nil, fmt.Errorf("arrow/ipc: could not convert schema metadata from flatbuf: %w", err) } - return arrow.NewSchema(fields, &md), nil + return arrow.NewSchemaWithEndian(fields, &md, endian.Endianness(schema.Endianness())), nil } func schemaToFB(b *flatbuffers.Builder, schema *arrow.Schema, memo *dictutils.Mapper) flatbuffers.UOffsetT { @@ -941,7 +1049,7 @@ func schemaToFB(b *flatbuffers.Builder, schema *arrow.Schema, memo *dictutils.Ma metaFB := metadataToFB(b, schema.Metadata(), flatbuf.SchemaStartCustomMetadataVector) flatbuf.SchemaStart(b) - flatbuf.SchemaAddEndianness(b, flatbuf.EndiannessLittle) + flatbuf.SchemaAddEndianness(b, flatbuf.Endianness(schema.Endianness())) flatbuf.SchemaAddFields(b, fieldsFB) flatbuf.SchemaAddCustomMetadata(b, metaFB) offset := flatbuf.SchemaEnd(b) diff --git a/go/arrow/ipc/metadata_test.go b/go/arrow/ipc/metadata_test.go index 9c39a936fd5fe..951db3d989f30 100644 --- a/go/arrow/ipc/metadata_test.go +++ b/go/arrow/ipc/metadata_test.go @@ -21,12 +21,12 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/dictutils" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/internal/testing/types" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/internal/dictutils" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow/memory" flatbuffers "github.com/google/flatbuffers/go" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/ipc/reader.go b/go/arrow/ipc/reader.go index 69f1097eac154..0af22bd95ca8f 100644 --- a/go/arrow/ipc/reader.go +++ b/go/arrow/ipc/reader.go @@ -23,12 +23,13 @@ import ( "io" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/internal/dictutils" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/internal/dictutils" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/memory" ) // Reader reads records from an io.Reader. @@ -43,12 +44,14 @@ type Reader struct { err error // types dictTypeMap - memo dictutils.Memo - readInitialDicts bool + memo dictutils.Memo + readInitialDicts bool + done bool + swapEndianness bool + ensureNativeEndian bool + expectedSchema *arrow.Schema mem memory.Allocator - - done bool } // NewReaderFromMessageReader allows constructing a new reader object with the @@ -69,13 +72,16 @@ func NewReaderFromMessageReader(r MessageReader, opts ...Option) (reader *Reader r: r, refCount: 1, // types: make(dictTypeMap), - memo: dictutils.NewMemo(), - mem: cfg.alloc, + memo: dictutils.NewMemo(), + mem: cfg.alloc, + ensureNativeEndian: cfg.ensureNativeEndian, + expectedSchema: cfg.schema, } - err = rr.readSchema(cfg.schema) - if err != nil { - return nil, fmt.Errorf("arrow/ipc: could not read schema from stream: %w", err) + if !cfg.noAutoSchema { + if err := rr.readSchema(cfg.schema); err != nil { + return nil, err + } } return rr, nil @@ -90,7 +96,15 @@ func NewReader(r io.Reader, opts ...Option) (*Reader, error) { // underlying stream. func (r *Reader) Err() error { return r.err } -func (r *Reader) Schema() *arrow.Schema { return r.schema } +func (r *Reader) Schema() *arrow.Schema { + if r.schema == nil { + if err := r.readSchema(r.expectedSchema); err != nil { + r.err = fmt.Errorf("arrow/ipc: could not read schema from stream: %w", err) + r.done = true + } + } + return r.schema +} func (r *Reader) readSchema(schema *arrow.Schema) error { msg, err := r.r.Message() @@ -116,6 +130,11 @@ func (r *Reader) readSchema(schema *arrow.Schema) error { return errInconsistentSchema } + if r.ensureNativeEndian && !r.schema.IsNativeEndian() { + r.swapEndianness = true + r.schema = r.schema.WithEndianness(endian.NativeEndian) + } + return nil } @@ -180,7 +199,7 @@ func (r *Reader) getInitialDicts() bool { if msg.Type() != MessageDictionaryBatch { r.err = fmt.Errorf("arrow/ipc: IPC stream did not have the expected (%d) dictionaries at the start of the stream", numDicts) } - if _, err := readDictionary(&r.memo, msg.meta, bytes.NewReader(msg.body.Bytes()), r.mem); err != nil { + if _, err := readDictionary(&r.memo, msg.meta, bytes.NewReader(msg.body.Bytes()), r.swapEndianness, r.mem); err != nil { r.done = true r.err = err return false @@ -196,6 +215,13 @@ func (r *Reader) next() bool { r.err = fmt.Errorf("arrow/ipc: unknown error while reading: %v", pErr) } }() + if r.schema == nil { + if err := r.readSchema(r.expectedSchema); err != nil { + r.err = fmt.Errorf("arrow/ipc: could not read schema from stream: %w", err) + r.done = true + return false + } + } if !r.readInitialDicts && !r.getInitialDicts() { return false @@ -205,7 +231,7 @@ func (r *Reader) next() bool { msg, r.err = r.r.Message() for msg != nil && msg.Type() == MessageDictionaryBatch { - if _, r.err = readDictionary(&r.memo, msg.meta, bytes.NewReader(msg.body.Bytes()), r.mem); r.err != nil { + if _, r.err = readDictionary(&r.memo, msg.meta, bytes.NewReader(msg.body.Bytes()), r.swapEndianness, r.mem); r.err != nil { r.done = true return false } @@ -224,7 +250,7 @@ func (r *Reader) next() bool { return false } - r.rec = newRecord(r.schema, &r.memo, msg.meta, bytes.NewReader(msg.body.Bytes()), r.mem) + r.rec = newRecord(r.schema, &r.memo, msg.meta, bytes.NewReader(msg.body.Bytes()), r.swapEndianness, r.mem) return true } diff --git a/go/arrow/ipc/reader_test.go b/go/arrow/ipc/reader_test.go index 76ef0c139ad08..503b788dcca4d 100644 --- a/go/arrow/ipc/reader_test.go +++ b/go/arrow/ipc/reader_test.go @@ -20,9 +20,9 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/ipc/stream_test.go b/go/arrow/ipc/stream_test.go index 83b401b517e24..3fb86c2afc55a 100644 --- a/go/arrow/ipc/stream_test.go +++ b/go/arrow/ipc/stream_test.go @@ -19,21 +19,16 @@ package ipc_test import ( "io" "io/ioutil" - "os" "strconv" "testing" - "github.com/apache/arrow/go/v9/arrow/internal/arrdata" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/internal/arrdata" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/memory" ) func TestStream(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-stream-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() for name, recs := range arrdata.Records { t.Run(name, func(t *testing.T) { @@ -64,11 +59,7 @@ func TestStream(t *testing.T) { } func TestStreamCompressed(t *testing.T) { - tempDir, err := ioutil.TempDir("", "go-arrow-stream-") - if err != nil { - t.Fatal(err) - } - defer os.RemoveAll(tempDir) + tempDir := t.TempDir() compressTypes := []flatbuf.CompressionType{ flatbuf.CompressionTypeLZ4_FRAME, flatbuf.CompressionTypeZSTD, diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go index 7a288af258ed9..9af88d9c2d78d 100644 --- a/go/arrow/ipc/writer.go +++ b/go/arrow/ipc/writer.go @@ -25,13 +25,16 @@ import ( "io" "math" "sync" - - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/internal/dictutils" - "github.com/apache/arrow/go/v9/arrow/internal/flatbuf" - "github.com/apache/arrow/go/v9/arrow/memory" + "unsafe" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/internal" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/internal/dictutils" + "github.com/apache/arrow/go/v10/arrow/internal/flatbuf" + "github.com/apache/arrow/go/v10/arrow/memory" ) type swriter struct { @@ -477,23 +480,25 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { return nil } - switch arr.NullN() { - case 0: - // there are no null values, drop the null bitmap - p.body = append(p.body, nil) - default: - data := arr.Data() - var bitmap *memory.Buffer - if data.NullN() == data.Len() { - // every value is null, just use a new unset bitmap to avoid the expense of copying - bitmap = memory.NewResizableBuffer(w.mem) - minLength := paddedLength(bitutil.BytesForBits(int64(data.Len())), kArrowAlignment) - bitmap.Resize(int(minLength)) - } else { - // otherwise truncate and copy the bits - bitmap = newTruncatedBitmap(w.mem, int64(data.Offset()), int64(data.Len()), data.Buffers()[0]) + if internal.HasValidityBitmap(arr.DataType().ID(), flatbuf.MetadataVersion(currentMetadataVersion)) { + switch arr.NullN() { + case 0: + // there are no null values, drop the null bitmap + p.body = append(p.body, nil) + default: + data := arr.Data() + var bitmap *memory.Buffer + if data.NullN() == data.Len() { + // every value is null, just use a new zero-initialized bitmap to avoid the expense of copying + bitmap = memory.NewResizableBuffer(w.mem) + minLength := paddedLength(bitutil.BytesForBits(int64(data.Len())), kArrowAlignment) + bitmap.Resize(int(minLength)) + } else { + // otherwise truncate and copy the bits + bitmap = newTruncatedBitmap(w.mem, int64(data.Offset()), int64(data.Len()), data.Buffers()[0]) + } + p.body = append(p.body, bitmap) } - p.body = append(p.body, bitmap) } switch dtype := arr.DataType().(type) { @@ -532,38 +537,8 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { } p.body = append(p.body, values) - case *arrow.BinaryType: - arr := arr.(*array.Binary) - voffsets, err := w.getZeroBasedValueOffsets(arr) - if err != nil { - return fmt.Errorf("could not retrieve zero-based value offsets from %T: %w", arr, err) - } - data := arr.Data() - values := data.Buffers()[2] - - var totalDataBytes int64 - if voffsets != nil { - totalDataBytes = int64(len(arr.ValueBytes())) - } - - switch { - case needTruncate(int64(data.Offset()), values, totalDataBytes): - // slice data buffer to include the range we need now. - var ( - beg = int64(arr.ValueOffset(0)) - len = minI64(paddedLength(totalDataBytes, kArrowAlignment), int64(totalDataBytes)) - ) - values = memory.NewBufferBytes(data.Buffers()[2].Bytes()[beg : beg+len]) - default: - if values != nil { - values.Retain() - } - } - p.body = append(p.body, voffsets) - p.body = append(p.body, values) - - case *arrow.StringType: - arr := arr.(*array.String) + case *arrow.BinaryType, *arrow.LargeBinaryType, *arrow.StringType, *arrow.LargeStringType: + arr := arr.(array.BinaryLike) voffsets, err := w.getZeroBasedValueOffsets(arr) if err != nil { return fmt.Errorf("could not retrieve zero-based value offsets from %T: %w", arr, err) @@ -580,7 +555,7 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { case needTruncate(int64(data.Offset()), values, totalDataBytes): // slice data buffer to include the range we need now. var ( - beg = int64(arr.ValueOffset(0)) + beg = arr.ValueOffset64(0) len = minI64(paddedLength(totalDataBytes, kArrowAlignment), int64(totalDataBytes)) ) values = memory.NewBufferBytes(data.Buffers()[2].Bytes()[beg : beg+len]) @@ -603,6 +578,75 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { } w.depth++ + case *arrow.SparseUnionType: + offset, length := arr.Data().Offset(), arr.Len() + arr := arr.(*array.SparseUnion) + typeCodes := getTruncatedBuffer(int64(offset), int64(length), int32(unsafe.Sizeof(arrow.UnionTypeCode(0))), arr.TypeCodes()) + p.body = append(p.body, typeCodes) + + w.depth-- + for i := 0; i < arr.NumFields(); i++ { + err := w.visit(p, arr.Field(i)) + if err != nil { + return fmt.Errorf("could not visit field %d of sparse union array: %w", i, err) + } + } + w.depth++ + case *arrow.DenseUnionType: + offset, length := arr.Data().Offset(), arr.Len() + arr := arr.(*array.DenseUnion) + typeCodes := getTruncatedBuffer(int64(offset), int64(length), int32(unsafe.Sizeof(arrow.UnionTypeCode(0))), arr.TypeCodes()) + p.body = append(p.body, typeCodes) + + w.depth-- + dt := arr.UnionType() + + // union type codes are not necessarily 0-indexed + maxCode := dt.MaxTypeCode() + + // allocate an array of child offsets. Set all to -1 to indicate we + // haven't observed a first occurrence of a particular child yet + offsets := make([]int32, maxCode+1) + lengths := make([]int32, maxCode+1) + offsets[0], lengths[0] = -1, 0 + for i := 1; i < len(offsets); i *= 2 { + copy(offsets[i:], offsets[:i]) + copy(lengths[i:], lengths[:i]) + } + + var valueOffsets *memory.Buffer + if offset != 0 { + valueOffsets = w.rebaseDenseUnionValueOffsets(arr, offsets, lengths) + } else { + valueOffsets = getTruncatedBuffer(int64(offset), int64(length), int32(arrow.Int32SizeBytes), arr.ValueOffsets()) + } + p.body = append(p.body, valueOffsets) + + // visit children and slice accordingly + for i := range dt.Fields() { + child := arr.Field(i) + // for sliced unions it's tricky to know how much to truncate + // the children. For now we'll truncate the children to be + // no longer than the parent union. + + if offset != 0 { + code := dt.TypeCodes()[i] + childOffset := offsets[code] + childLen := lengths[code] + + if childOffset > 0 { + child = array.NewSlice(child, int64(childOffset), int64(childOffset+childLen)) + defer child.Release() + } else if childLen < int32(child.Len()) { + child = array.NewSlice(child, 0, int64(childLen)) + defer child.Release() + } + } + if err := w.visit(p, child); err != nil { + return fmt.Errorf("could not visit field %d of dense union array: %w", i, err) + } + } + w.depth++ case *arrow.MapType: arr := arr.(*array.Map) voffsets, err := w.getZeroBasedValueOffsets(arr) @@ -640,8 +684,8 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { return fmt.Errorf("could not visit list element for array %T: %w", arr, err) } w.depth++ - case *arrow.ListType: - arr := arr.(*array.List) + case *arrow.ListType, *arrow.LargeListType: + arr := arr.(array.ListLike) voffsets, err := w.getZeroBasedValueOffsets(arr) if err != nil { return fmt.Errorf("could not retrieve zero-based value offsets for array %T: %w", arr, err) @@ -661,12 +705,13 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { } }() - if voffsets != nil { - values_offset = int64(arr.Offsets()[0]) - values_length = int64(arr.Offsets()[arr.Len()]) - values_offset + if arr.Len() > 0 && voffsets != nil { + values_offset, _ = arr.ValueOffsets(0) + _, values_length = arr.ValueOffsets(arr.Len() - 1) + values_length -= values_offset } - if len(arr.Offsets()) != 0 || values_length < int64(values.Len()) { + if arr.Len() != 0 || values_length < int64(values.Len()) { // must also slice the values values = array.NewSlice(values, values_offset, values_length) mustRelease = true @@ -707,7 +752,8 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) (*memory.Buffer, error) { data := arr.Data() voffsets := data.Buffers()[1] - offsetBytesNeeded := arrow.Int32Traits.BytesRequired(data.Len() + 1) + offsetTraits := arr.DataType().(arrow.OffsetsDataType).OffsetTypeTraits() + offsetBytesNeeded := offsetTraits.BytesRequired(data.Len() + 1) if data.Offset() != 0 || offsetBytesNeeded < voffsets.Len() { // if we have a non-zero offset, then the value offsets do not start at @@ -719,13 +765,27 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) (*memory.Buffe shiftedOffsets := memory.NewResizableBuffer(w.mem) shiftedOffsets.Resize(offsetBytesNeeded) - dest := arrow.Int32Traits.CastFromBytes(shiftedOffsets.Bytes()) - offsets := arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()+1] + switch arr.DataType().Layout().Buffers[1].ByteWidth { + case 8: + dest := arrow.Int64Traits.CastFromBytes(shiftedOffsets.Bytes()) + offsets := arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()+1] + + startOffset := offsets[0] + for i, o := range offsets { + dest[i] = o - startOffset + } + + default: + debug.Assert(arr.DataType().Layout().Buffers[1].ByteWidth == 4, "invalid offset bytewidth") + dest := arrow.Int32Traits.CastFromBytes(shiftedOffsets.Bytes()) + offsets := arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()+1] - startOffset := offsets[0] - for i, o := range offsets { - dest[i] = o - startOffset + startOffset := offsets[0] + for i, o := range offsets { + dest[i] = o - startOffset + } } + voffsets = shiftedOffsets } else { voffsets.Retain() @@ -737,6 +797,33 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) (*memory.Buffe return voffsets, nil } +func (w *recordEncoder) rebaseDenseUnionValueOffsets(arr *array.DenseUnion, offsets, lengths []int32) *memory.Buffer { + // this case sucks. Because the offsets are different for each + // child array, when we have a sliced array, we need to re-base + // the value offsets for each array! ew. + unshiftedOffsets := arr.RawValueOffsets() + codes := arr.RawTypeCodes() + + shiftedOffsetsBuf := memory.NewResizableBuffer(w.mem) + shiftedOffsetsBuf.Resize(arrow.Int32Traits.BytesRequired(arr.Len())) + shiftedOffsets := arrow.Int32Traits.CastFromBytes(shiftedOffsetsBuf.Bytes()) + + // compute shifted offsets by subtracting child offset + for i, c := range codes { + if offsets[c] == -1 { + // offsets are guaranteed to be increasing according to the spec + // so the first offset we find for a child is the initial offset + // and will become the "0" for this child. + offsets[c] = unshiftedOffsets[i] + shiftedOffsets[i] = 0 + } else { + shiftedOffsets[i] = unshiftedOffsets[i] - offsets[c] + } + lengths[c] = maxI32(lengths[c], shiftedOffsets[i]+1) + } + return shiftedOffsetsBuf +} + func (w *recordEncoder) Encode(p *Payload, rec arrow.Record) error { if err := w.encode(p, rec); err != nil { return err @@ -768,6 +855,19 @@ func newTruncatedBitmap(mem memory.Allocator, offset, length int64, input *memor } } +func getTruncatedBuffer(offset, length int64, byteWidth int32, buf *memory.Buffer) *memory.Buffer { + if buf == nil { + return buf + } + + paddedLen := paddedLength(length*int64(byteWidth), kArrowAlignment) + if offset != 0 || paddedLen < int64(buf.Len()) { + return memory.SliceBuffer(buf, int(offset*int64(byteWidth)), int(minI64(paddedLen, int64(buf.Len())))) + } + buf.Retain() + return buf +} + func needTruncate(offset int64, buf *memory.Buffer, minLength int64) bool { if buf == nil { return false @@ -781,3 +881,10 @@ func minI64(a, b int64) int64 { } return b } + +func maxI32(a, b int32) int32 { + if a > b { + return a + } + return b +} diff --git a/go/arrow/ipc/writer_test.go b/go/arrow/ipc/writer_test.go index ae15e838b634c..c9e661f144e3e 100644 --- a/go/arrow/ipc/writer_test.go +++ b/go/arrow/ipc/writer_test.go @@ -21,10 +21,10 @@ import ( "fmt" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/arrow/math/Makefile b/go/arrow/math/Makefile index 15255950d71cb..3cc392071cde1 100644 --- a/go/arrow/math/Makefile +++ b/go/arrow/math/Makefile @@ -50,15 +50,15 @@ INTEL_SOURCES := \ assembly: $(INTEL_SOURCES) generate: ../bin/tmpl - ../bin/tmpl -i -data=float64.tmpldata type.go.tmpl=float64.go type_amd64.go.tmpl=float64_amd64.go type_arm64.go.tmpl=float64_arm64.go type_s390x.go.tmpl=float64_s390x.go type_noasm.go.tmpl=float64_noasm.go type_test.go.tmpl=float64_test.go + ../bin/tmpl -i -data=float64.tmpldata type.go.tmpl=float64.go type_amd64.go.tmpl=float64_amd64.go type_arm64.go.tmpl=float64_arm64.go type_ppc64le.go.tmpl=float64_ppc64le.go type_s390x.go.tmpl=float64_s390x.go type_noasm.go.tmpl=float64_noasm.go type_test.go.tmpl=float64_test.go ../bin/tmpl -i -data=float64.tmpldata -d arch=avx2 type_simd_amd64.go.tmpl=float64_avx2_amd64.go ../bin/tmpl -i -data=float64.tmpldata -d arch=sse4 type_simd_amd64.go.tmpl=float64_sse4_amd64.go ../bin/tmpl -i -data=float64.tmpldata -d arch=neon type_simd_arm64.go.tmpl=float64_neon_arm64.go - ../bin/tmpl -i -data=int64.tmpldata type.go.tmpl=int64.go type_amd64.go.tmpl=int64_amd64.go type_arm64.go.tmpl=int64_arm64.go type_s390x.go.tmpl=int64_s390x.go type_noasm.go.tmpl=int64_noasm.go type_test.go.tmpl=int64_test.go + ../bin/tmpl -i -data=int64.tmpldata type.go.tmpl=int64.go type_amd64.go.tmpl=int64_amd64.go type_arm64.go.tmpl=int64_arm64.go type_ppc64le.go.tmpl=int64_ppc64le.go type_s390x.go.tmpl=int64_s390x.go type_noasm.go.tmpl=int64_noasm.go type_test.go.tmpl=int64_test.go ../bin/tmpl -i -data=int64.tmpldata -d arch=avx2 type_simd_amd64.go.tmpl=int64_avx2_amd64.go ../bin/tmpl -i -data=int64.tmpldata -d arch=sse4 type_simd_amd64.go.tmpl=int64_sse4_amd64.go ../bin/tmpl -i -data=int64.tmpldata -d arch=neon type_simd_arm64.go.tmpl=int64_neon_arm64.go - ../bin/tmpl -i -data=uint64.tmpldata type.go.tmpl=uint64.go type_amd64.go.tmpl=uint64_amd64.go type_arm64.go.tmpl=uint64_arm64.go type_s390x.go.tmpl=uint64_s390x.go type_noasm.go.tmpl=uint64_noasm.go type_test.go.tmpl=uint64_test.go + ../bin/tmpl -i -data=uint64.tmpldata type.go.tmpl=uint64.go type_amd64.go.tmpl=uint64_amd64.go type_arm64.go.tmpl=uint64_arm64.go type_ppc64le.go.tmpl=uint64_ppc64le.go type_s390x.go.tmpl=uint64_s390x.go type_noasm.go.tmpl=uint64_noasm.go type_test.go.tmpl=uint64_test.go ../bin/tmpl -i -data=uint64.tmpldata -d arch=avx2 type_simd_amd64.go.tmpl=uint64_avx2_amd64.go ../bin/tmpl -i -data=uint64.tmpldata -d arch=sse4 type_simd_amd64.go.tmpl=uint64_sse4_amd64.go ../bin/tmpl -i -data=uint64.tmpldata -d arch=neon type_simd_arm64.go.tmpl=uint64_neon_arm64.go diff --git a/go/arrow/math/float64.go b/go/arrow/math/float64.go index c7f6072ebadcb..6119998968284 100644 --- a/go/arrow/math/float64.go +++ b/go/arrow/math/float64.go @@ -19,7 +19,7 @@ package math import ( - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) type Float64Funcs struct { diff --git a/go/arrow/math/float64_amd64.go b/go/arrow/math/float64_amd64.go index 87227d5fa822f..ef40cf613a3c9 100644 --- a/go/arrow/math/float64_amd64.go +++ b/go/arrow/math/float64_amd64.go @@ -16,6 +16,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/float64_avx2_amd64.go b/go/arrow/math/float64_avx2_amd64.go index 91eef40b45976..fd896abc79aab 100644 --- a/go/arrow/math/float64_avx2_amd64.go +++ b/go/arrow/math/float64_avx2_amd64.go @@ -16,6 +16,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math @@ -23,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) //go:noescape diff --git a/go/arrow/math/float64_neon_arm64.go b/go/arrow/math/float64_neon_arm64.go index 7e2bda9762497..676c2ca439fc3 100755 --- a/go/arrow/math/float64_neon_arm64.go +++ b/go/arrow/math/float64_neon_arm64.go @@ -23,7 +23,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) //go:noescape diff --git a/go/arrow/math/float64_ppc64le.go b/go/arrow/math/float64_ppc64le.go new file mode 100644 index 0000000000000..f60be90721d3a --- /dev/null +++ b/go/arrow/math/float64_ppc64le.go @@ -0,0 +1,25 @@ +// Code generated by type_s390x.go.tmpl. DO NOT EDIT. + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !noasm + +package math + +func initFloat64Go() { + Float64.sum = sum_float64_go +} diff --git a/go/arrow/math/float64_sse4_amd64.go b/go/arrow/math/float64_sse4_amd64.go index 449cb222252b7..d388cce72769a 100644 --- a/go/arrow/math/float64_sse4_amd64.go +++ b/go/arrow/math/float64_sse4_amd64.go @@ -16,6 +16,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math @@ -23,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) //go:noescape diff --git a/go/arrow/math/float64_test.go b/go/arrow/math/float64_test.go index ee8f7d6523bc7..906ecfad4242d 100644 --- a/go/arrow/math/float64_test.go +++ b/go/arrow/math/float64_test.go @@ -21,9 +21,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/math" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/math" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/math/int64.go b/go/arrow/math/int64.go index 479aaf4d13381..8c2edf0f5a00a 100644 --- a/go/arrow/math/int64.go +++ b/go/arrow/math/int64.go @@ -19,7 +19,7 @@ package math import ( - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) type Int64Funcs struct { diff --git a/go/arrow/math/int64_amd64.go b/go/arrow/math/int64_amd64.go index 2703bebd00a99..ed616e84264e8 100644 --- a/go/arrow/math/int64_amd64.go +++ b/go/arrow/math/int64_amd64.go @@ -16,6 +16,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/int64_avx2_amd64.go b/go/arrow/math/int64_avx2_amd64.go index c28a6ad359c4a..60e4c98ef2a89 100644 --- a/go/arrow/math/int64_avx2_amd64.go +++ b/go/arrow/math/int64_avx2_amd64.go @@ -16,6 +16,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math @@ -23,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) //go:noescape diff --git a/go/arrow/math/int64_neon_arm64.go b/go/arrow/math/int64_neon_arm64.go index a9b41f13c4326..a8edaaa64df89 100755 --- a/go/arrow/math/int64_neon_arm64.go +++ b/go/arrow/math/int64_neon_arm64.go @@ -23,7 +23,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) //go:noescape diff --git a/go/arrow/math/int64_ppc64le.go b/go/arrow/math/int64_ppc64le.go new file mode 100644 index 0000000000000..1a615a9b27dcf --- /dev/null +++ b/go/arrow/math/int64_ppc64le.go @@ -0,0 +1,25 @@ +// Code generated by type_s390x.go.tmpl. DO NOT EDIT. + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !noasm + +package math + +func initInt64Go() { + Int64.sum = sum_int64_go +} diff --git a/go/arrow/math/int64_sse4_amd64.go b/go/arrow/math/int64_sse4_amd64.go index 7165ec90acceb..75f94f56b739a 100644 --- a/go/arrow/math/int64_sse4_amd64.go +++ b/go/arrow/math/int64_sse4_amd64.go @@ -16,6 +16,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math @@ -23,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) //go:noescape diff --git a/go/arrow/math/int64_test.go b/go/arrow/math/int64_test.go index d10ebf9de60ec..b491a8893c420 100644 --- a/go/arrow/math/int64_test.go +++ b/go/arrow/math/int64_test.go @@ -21,9 +21,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/math" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/math" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/math/math_ppc64le.go b/go/arrow/math/math_ppc64le.go new file mode 100644 index 0000000000000..3daeac7efaff8 --- /dev/null +++ b/go/arrow/math/math_ppc64le.go @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !noasm + +package math + +func init() { + initGo() +} + +func initGo() { + initFloat64Go() + initInt64Go() + initUint64Go() +} diff --git a/go/arrow/math/type.go.tmpl b/go/arrow/math/type.go.tmpl index 4503722a307c8..22c0701663d26 100644 --- a/go/arrow/math/type.go.tmpl +++ b/go/arrow/math/type.go.tmpl @@ -17,7 +17,7 @@ package math import ( - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) {{$def := .D}} diff --git a/go/arrow/math/type_ppc64le.go.tmpl b/go/arrow/math/type_ppc64le.go.tmpl new file mode 100644 index 0000000000000..49a7fc634fd67 --- /dev/null +++ b/go/arrow/math/type_ppc64le.go.tmpl @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !noasm + +package math + +{{with .In}} +func init{{.Name}}Go() { + {{.Name}}.sum = sum_{{.Type}}_go +} +{{end}} diff --git a/go/arrow/math/type_simd_amd64.go.tmpl b/go/arrow/math/type_simd_amd64.go.tmpl index 05e24e29c49d7..2a2954ac3b42a 100644 --- a/go/arrow/math/type_simd_amd64.go.tmpl +++ b/go/arrow/math/type_simd_amd64.go.tmpl @@ -21,7 +21,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) {{$name := printf "%s_%s" .In.Type .D.arch}} diff --git a/go/arrow/math/type_simd_arm64.go.tmpl b/go/arrow/math/type_simd_arm64.go.tmpl index 05e24e29c49d7..2a2954ac3b42a 100755 --- a/go/arrow/math/type_simd_arm64.go.tmpl +++ b/go/arrow/math/type_simd_arm64.go.tmpl @@ -21,7 +21,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) {{$name := printf "%s_%s" .In.Type .D.arch}} diff --git a/go/arrow/math/type_test.go.tmpl b/go/arrow/math/type_test.go.tmpl index 26cd3b9a27873..15f44b3848ec3 100644 --- a/go/arrow/math/type_test.go.tmpl +++ b/go/arrow/math/type_test.go.tmpl @@ -19,9 +19,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/math" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/math" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/math/uint64.go b/go/arrow/math/uint64.go index e54d2c88671ad..ae24e61cf98b9 100644 --- a/go/arrow/math/uint64.go +++ b/go/arrow/math/uint64.go @@ -19,7 +19,7 @@ package math import ( - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) type Uint64Funcs struct { diff --git a/go/arrow/math/uint64_amd64.go b/go/arrow/math/uint64_amd64.go index c6bc29c4e3e32..726d3425614f8 100644 --- a/go/arrow/math/uint64_amd64.go +++ b/go/arrow/math/uint64_amd64.go @@ -16,6 +16,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/uint64_avx2_amd64.go b/go/arrow/math/uint64_avx2_amd64.go index 0d73bcc0ab13f..7f953d8b8ef3a 100644 --- a/go/arrow/math/uint64_avx2_amd64.go +++ b/go/arrow/math/uint64_avx2_amd64.go @@ -16,6 +16,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math @@ -23,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) //go:noescape diff --git a/go/arrow/math/uint64_neon_arm64.go b/go/arrow/math/uint64_neon_arm64.go index 7252281b674de..ea500b4f299f5 100755 --- a/go/arrow/math/uint64_neon_arm64.go +++ b/go/arrow/math/uint64_neon_arm64.go @@ -23,7 +23,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) //go:noescape diff --git a/go/arrow/math/uint64_ppc64le.go b/go/arrow/math/uint64_ppc64le.go new file mode 100644 index 0000000000000..8f7419fd4846a --- /dev/null +++ b/go/arrow/math/uint64_ppc64le.go @@ -0,0 +1,25 @@ +// Code generated by type_s390x.go.tmpl. DO NOT EDIT. + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !noasm + +package math + +func initUint64Go() { + Uint64.sum = sum_uint64_go +} diff --git a/go/arrow/math/uint64_sse4_amd64.go b/go/arrow/math/uint64_sse4_amd64.go index 86c009b3fe060..30f33135eb49c 100644 --- a/go/arrow/math/uint64_sse4_amd64.go +++ b/go/arrow/math/uint64_sse4_amd64.go @@ -16,6 +16,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math @@ -23,7 +24,7 @@ package math import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow/array" ) //go:noescape diff --git a/go/arrow/math/uint64_test.go b/go/arrow/math/uint64_test.go index b34b44d7e73b8..7fd862bb92b62 100644 --- a/go/arrow/math/uint64_test.go +++ b/go/arrow/math/uint64_test.go @@ -21,9 +21,9 @@ package math_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/math" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/math" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/memory/buffer.go b/go/arrow/memory/buffer.go index 1d734adb49253..cbffc7c23f009 100644 --- a/go/arrow/memory/buffer.go +++ b/go/arrow/memory/buffer.go @@ -19,7 +19,7 @@ package memory import ( "sync/atomic" - "github.com/apache/arrow/go/v9/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/internal/debug" ) // Buffer is a wrapper type for a buffer of bytes. @@ -48,6 +48,10 @@ func SliceBuffer(buf *Buffer, offset, length int) *Buffer { return &Buffer{refCount: 1, parent: buf, buf: buf.Bytes()[offset : offset+length], length: length} } +// Parent returns either nil or a pointer to the parent buffer if this buffer +// was sliced from another. +func (b *Buffer) Parent() *Buffer { return b.parent } + // Retain increases the reference count by 1. func (b *Buffer) Retain() { if b.mem != nil || b.parent != nil { diff --git a/go/arrow/memory/buffer_test.go b/go/arrow/memory/buffer_test.go index f83e61a53204a..7d1d435fdc914 100644 --- a/go/arrow/memory/buffer_test.go +++ b/go/arrow/memory/buffer_test.go @@ -19,7 +19,7 @@ package memory_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/memory/cgo_allocator.go b/go/arrow/memory/cgo_allocator.go index e827be4a3a201..0eb7145fd670c 100644 --- a/go/arrow/memory/cgo_allocator.go +++ b/go/arrow/memory/cgo_allocator.go @@ -22,7 +22,7 @@ package memory import ( "runtime" - cga "github.com/apache/arrow/go/v9/arrow/memory/internal/cgoalloc" + cga "github.com/apache/arrow/go/v10/arrow/memory/internal/cgoalloc" ) // CgoArrowAllocator is an allocator which exposes the C++ memory pool class diff --git a/go/arrow/memory/memory_test.go b/go/arrow/memory/memory_test.go index bbab3982459d6..b34cf5e860099 100644 --- a/go/arrow/memory/memory_test.go +++ b/go/arrow/memory/memory_test.go @@ -19,7 +19,7 @@ package memory_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/numeric.tmpldata b/go/arrow/numeric.tmpldata index 127a5a107e223..92cd48ba1078d 100644 --- a/go/arrow/numeric.tmpldata +++ b/go/arrow/numeric.tmpldata @@ -4,7 +4,10 @@ "name": "int64", "Type": "int64", "Default": "0", - "Size": "8" + "Size": "8", + "Opt": { + "BufferBuilder": true + } }, { "Name": "Uint64", @@ -63,7 +66,10 @@ "name": "int8", "Type": "int8", "Default": "0", - "Size": "1" + "Size": "1", + "Opt": { + "BufferBuilder": true + } }, { "Name": "Uint8", diff --git a/go/arrow/scalar/binary.go b/go/arrow/scalar/binary.go index 12d91775500c9..19ff685047552 100644 --- a/go/arrow/scalar/binary.go +++ b/go/arrow/scalar/binary.go @@ -21,8 +21,8 @@ import ( "fmt" "unicode/utf8" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" ) type BinaryScalar interface { @@ -30,6 +30,7 @@ type BinaryScalar interface { Retain() Release() + Buffer() *memory.Buffer Data() []byte } @@ -46,6 +47,7 @@ func (b *Binary) Data() []byte { return b.Value.Bytes() } func (b *Binary) equals(rhs Scalar) bool { return bytes.Equal(b.Value.Bytes(), rhs.(BinaryScalar).Data()) } +func (b *Binary) Buffer() *memory.Buffer { return b.Value } func (b *Binary) String() string { if !b.Valid { return "null" @@ -61,9 +63,13 @@ func (b *Binary) CastTo(to arrow.DataType) (Scalar, error) { switch to.ID() { case arrow.BINARY: - return b, nil + return NewBinaryScalar(b.Value, b.Type), nil + case arrow.LARGE_BINARY: + return NewLargeBinaryScalar(b.Value), nil case arrow.STRING: return NewStringScalarFromBuffer(b.Value), nil + case arrow.LARGE_STRING: + return NewLargeStringScalarFromBuffer(b.Value), nil case arrow.FIXED_SIZE_BINARY: if b.Value.Len() == to.(*arrow.FixedSizeBinaryType).ByteWidth { return NewFixedSizeBinaryScalar(b.Value, to), nil @@ -86,9 +92,18 @@ func (b *Binary) ValidateFull() error { } func NewBinaryScalar(val *memory.Buffer, typ arrow.DataType) *Binary { + val.Retain() return &Binary{scalar{typ, true}, val} } +type LargeBinary struct { + *Binary +} + +func NewLargeBinaryScalar(val *memory.Buffer) *LargeBinary { + return &LargeBinary{NewBinaryScalar(val, arrow.BinaryTypes.LargeBinary)} +} + type String struct { *Binary } @@ -129,10 +144,29 @@ func NewStringScalar(val string) *String { } func NewStringScalarFromBuffer(val *memory.Buffer) *String { - val.Retain() + // NewBinaryScalar will call Retain on val, so we don't have to return &String{NewBinaryScalar(val, arrow.BinaryTypes.String)} } +// alias the String struct we are embedding so it doesn't hide the +// String() function that we want to expose +type stringScalar = String + +type LargeString struct { + *stringScalar +} + +func NewLargeStringScalar(val string) *LargeString { + buf := memory.NewBufferBytes([]byte(val)) + defer buf.Release() + return NewLargeStringScalarFromBuffer(buf) +} + +func NewLargeStringScalarFromBuffer(val *memory.Buffer) *LargeString { + // NewBinaryScalar will call retain on val, so we don't have to + return &LargeString{stringScalar: &String{NewBinaryScalar(val, arrow.BinaryTypes.LargeString)}} +} + type FixedSizeBinary struct { *Binary } @@ -154,6 +188,6 @@ func (b *FixedSizeBinary) Validate() (err error) { func (b *FixedSizeBinary) ValidateFull() error { return b.Validate() } func NewFixedSizeBinaryScalar(val *memory.Buffer, typ arrow.DataType) *FixedSizeBinary { - val.Retain() + // NewBinaryScalar will call Retain on val, so we don't have to return &FixedSizeBinary{NewBinaryScalar(val, typ)} } diff --git a/go/arrow/scalar/compare.go b/go/arrow/scalar/compare.go index d83284a91de06..d3d5aa1041a4a 100644 --- a/go/arrow/scalar/compare.go +++ b/go/arrow/scalar/compare.go @@ -16,7 +16,7 @@ package scalar -import "github.com/apache/arrow/go/v9/arrow" +import "github.com/apache/arrow/go/v10/arrow" //TODO(zeroshade): approxequals // tracked in https://issues.apache.org/jira/browse/ARROW-13980 diff --git a/go/arrow/scalar/nested.go b/go/arrow/scalar/nested.go index 0a3bedad0bb7a..4a2e99bcf3826 100644 --- a/go/arrow/scalar/nested.go +++ b/go/arrow/scalar/nested.go @@ -21,10 +21,10 @@ import ( "errors" "fmt" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "golang.org/x/xerrors" ) @@ -66,6 +66,8 @@ func (l *List) Validate() (err error) { switch dt := l.Type.(type) { case *arrow.ListType: valueType = dt.Elem() + case *arrow.LargeListType: + valueType = dt.Elem() case *arrow.FixedSizeListType: valueType = dt.Elem() case *arrow.MapType: @@ -120,6 +122,18 @@ func NewListScalarData(val arrow.ArrayData) *List { return &List{scalar{arrow.ListOf(val.DataType()), true}, array.MakeFromData(val)} } +type LargeList struct { + *List +} + +func NewLargeListScalar(val arrow.Array) *LargeList { + return &LargeList{&List{scalar{arrow.LargeListOf(val.DataType()), true}, array.MakeFromData(val.Data())}} +} + +func NewLargeListScalarData(val arrow.ArrayData) *LargeList { + return &LargeList{&List{scalar{arrow.LargeListOf(val.DataType()), true}, array.MakeFromData(val)}} +} + func makeMapType(typ *arrow.StructType) *arrow.MapType { debug.Assert(len(typ.Fields()) == 2, "must pass struct with only 2 fields for MapScalar") return arrow.MapOf(typ.Field(0).Type, typ.Field(1).Type) @@ -348,6 +362,8 @@ func NewDictScalar(index Scalar, dict arrow.Array) *Dictionary { return ret } +func (s *Dictionary) Data() []byte { return s.Value.Index.(PrimitiveScalar).Data() } + func (s *Dictionary) Retain() { if r, ok := s.Value.Index.(Releasable); ok { r.Retain() @@ -506,3 +522,195 @@ func (s *Dictionary) GetEncodedValue() (Scalar, error) { func (s *Dictionary) value() interface{} { return s.Value.Index.value() } + +type Union interface { + Scalar + ChildValue() Scalar + Release() +} + +type SparseUnion struct { + scalar + + TypeCode arrow.UnionTypeCode + Value []Scalar + ChildID int +} + +func (s *SparseUnion) equals(rhs Scalar) bool { + right := rhs.(*SparseUnion) + return Equals(s.ChildValue(), right.ChildValue()) +} + +func (s *SparseUnion) value() interface{} { return s.ChildValue() } + +func (s *SparseUnion) String() string { + dt := s.Type.(*arrow.SparseUnionType) + val := s.ChildValue() + return "union{" + dt.Fields()[dt.ChildIDs()[s.TypeCode]].String() + " = " + val.String() + "}" +} + +func (s *SparseUnion) Release() { + for _, v := range s.Value { + if v, ok := v.(Releasable); ok { + v.Release() + } + } +} + +func (s *SparseUnion) Validate() (err error) { + dt := s.Type.(*arrow.SparseUnionType) + if len(dt.Fields()) != len(s.Value) { + return fmt.Errorf("sparse union scalar value had %d fields but type has %d fields", len(dt.Fields()), len(s.Value)) + } + + if s.TypeCode < 0 || int(s.TypeCode) >= len(dt.ChildIDs()) || dt.ChildIDs()[s.TypeCode] == arrow.InvalidUnionChildID { + return fmt.Errorf("%s scalar has invalid type code %d", dt, s.TypeCode) + } + + for i, f := range dt.Fields() { + v := s.Value[i] + if !arrow.TypeEqual(f.Type, v.DataType()) { + return fmt.Errorf("%s value for field %s had incorrect type of %s", dt, f, v.DataType()) + } + if err = v.Validate(); err != nil { + return err + } + } + return +} + +func (s *SparseUnion) ValidateFull() (err error) { + dt := s.Type.(*arrow.SparseUnionType) + if len(dt.Fields()) != len(s.Value) { + return fmt.Errorf("sparse union scalar value had %d fields but type has %d fields", len(dt.Fields()), len(s.Value)) + } + + if s.TypeCode < 0 || int(s.TypeCode) >= len(dt.ChildIDs()) || dt.ChildIDs()[s.TypeCode] == arrow.InvalidUnionChildID { + return fmt.Errorf("%s scalar has invalid type code %d", dt, s.TypeCode) + } + + for i, f := range dt.Fields() { + v := s.Value[i] + if !arrow.TypeEqual(f.Type, v.DataType()) { + return fmt.Errorf("%s value for field %s had incorrect type of %s", dt, f, v.DataType()) + } + if err = v.ValidateFull(); err != nil { + return err + } + } + return +} + +func (s *SparseUnion) CastTo(to arrow.DataType) (Scalar, error) { + if !s.Valid { + return MakeNullScalar(to), nil + } + + switch to.ID() { + case arrow.STRING: + return NewStringScalar(s.String()), nil + case arrow.LARGE_STRING: + return NewLargeStringScalar(s.String()), nil + } + + return nil, fmt.Errorf("cannot cast non-nil union to type other than string") +} + +func (s *SparseUnion) ChildValue() Scalar { return s.Value[s.ChildID] } + +func NewSparseUnionScalar(val []Scalar, code arrow.UnionTypeCode, dt *arrow.SparseUnionType) *SparseUnion { + ret := &SparseUnion{ + scalar: scalar{dt, true}, + TypeCode: code, + Value: val, + ChildID: dt.ChildIDs()[code], + } + ret.Valid = ret.Value[ret.ChildID].IsValid() + return ret +} + +func NewSparseUnionScalarFromValue(val Scalar, idx int, dt *arrow.SparseUnionType) *SparseUnion { + code := dt.TypeCodes()[idx] + values := make([]Scalar, len(dt.Fields())) + for i, f := range dt.Fields() { + if i == idx { + values[i] = val + } else { + values[i] = MakeNullScalar(f.Type) + } + } + return NewSparseUnionScalar(values, code, dt) +} + +type DenseUnion struct { + scalar + + TypeCode arrow.UnionTypeCode + Value Scalar +} + +func (s *DenseUnion) equals(rhs Scalar) bool { + right := rhs.(*DenseUnion) + return Equals(s.Value, right.Value) +} + +func (s *DenseUnion) value() interface{} { return s.ChildValue() } + +func (s *DenseUnion) String() string { + dt := s.Type.(*arrow.DenseUnionType) + return "union{" + dt.Fields()[dt.ChildIDs()[s.TypeCode]].String() + " = " + s.Value.String() + "}" +} + +func (s *DenseUnion) Release() { + if v, ok := s.Value.(Releasable); ok { + v.Release() + } +} + +func (s *DenseUnion) Validate() (err error) { + dt := s.Type.(*arrow.DenseUnionType) + if s.TypeCode < 0 || int(s.TypeCode) >= len(dt.ChildIDs()) || dt.ChildIDs()[s.TypeCode] == arrow.InvalidUnionChildID { + return fmt.Errorf("%s scalar has invalid type code %d", dt, s.TypeCode) + } + fieldType := dt.Fields()[dt.ChildIDs()[s.TypeCode]].Type + if !arrow.TypeEqual(fieldType, s.Value.DataType()) { + return fmt.Errorf("%s scalar with type code %d should have an underlying value of type %s, got %s", + s.Type, s.TypeCode, fieldType, s.Value.DataType()) + } + return s.Value.Validate() +} + +func (s *DenseUnion) ValidateFull() error { + dt := s.Type.(*arrow.DenseUnionType) + if s.TypeCode < 0 || int(s.TypeCode) >= len(dt.ChildIDs()) || dt.ChildIDs()[s.TypeCode] == arrow.InvalidUnionChildID { + return fmt.Errorf("%s scalar has invalid type code %d", dt, s.TypeCode) + } + fieldType := dt.Fields()[dt.ChildIDs()[s.TypeCode]].Type + if !arrow.TypeEqual(fieldType, s.Value.DataType()) { + return fmt.Errorf("%s scalar with type code %d should have an underlying value of type %s, got %s", + s.Type, s.TypeCode, fieldType, s.Value.DataType()) + } + return s.Value.ValidateFull() +} + +func (s *DenseUnion) CastTo(to arrow.DataType) (Scalar, error) { + if !s.Valid { + return MakeNullScalar(to), nil + } + + switch to.ID() { + case arrow.STRING: + return NewStringScalar(s.String()), nil + case arrow.LARGE_STRING: + return NewLargeStringScalar(s.String()), nil + } + + return nil, fmt.Errorf("cannot cast non-nil union to type other than string") +} + +func (s *DenseUnion) ChildValue() Scalar { return s.Value } + +func NewDenseUnionScalar(v Scalar, code arrow.UnionTypeCode, dt *arrow.DenseUnionType) *DenseUnion { + return &DenseUnion{scalar: scalar{dt, v.IsValid()}, TypeCode: code, Value: v} +} diff --git a/go/arrow/scalar/numeric.gen.go b/go/arrow/scalar/numeric.gen.go index 2d876a4dd77ff..9d743f5e9de80 100644 --- a/go/arrow/scalar/numeric.gen.go +++ b/go/arrow/scalar/numeric.gen.go @@ -23,7 +23,7 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" ) type Int8 struct { @@ -81,6 +81,8 @@ func (s *Int8) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type int8 to type %s", dt) @@ -145,6 +147,8 @@ func (s *Int16) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type int16 to type %s", dt) @@ -209,6 +213,8 @@ func (s *Int32) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type int32 to type %s", dt) @@ -273,6 +279,8 @@ func (s *Int64) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type int64 to type %s", dt) @@ -337,6 +345,8 @@ func (s *Uint8) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type uint8 to type %s", dt) @@ -401,6 +411,8 @@ func (s *Uint16) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type uint16 to type %s", dt) @@ -465,6 +477,8 @@ func (s *Uint32) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type uint32 to type %s", dt) @@ -529,6 +543,8 @@ func (s *Uint64) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type uint64 to type %s", dt) @@ -593,6 +609,8 @@ func (s *Float32) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type float32 to type %s", dt) @@ -657,6 +675,8 @@ func (s *Float64) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type float64 to type %s", dt) diff --git a/go/arrow/scalar/numeric.gen.go.tmpl b/go/arrow/scalar/numeric.gen.go.tmpl index dc1a54586ab9b..07df9fc1d4b15 100644 --- a/go/arrow/scalar/numeric.gen.go.tmpl +++ b/go/arrow/scalar/numeric.gen.go.tmpl @@ -72,6 +72,8 @@ func (s *{{.Name}}) CastTo(dt arrow.DataType) (Scalar, error) { return NewMonthIntervalScalar(arrow.MonthInterval(s.Value)), nil case *arrow.StringType: return NewStringScalar(fmt.Sprintf("%v", s.Value)), nil + case *arrow.LargeStringType: + return NewLargeStringScalar(fmt.Sprintf("%v", s.Value)), nil } return nil, fmt.Errorf("invalid scalar cast from type {{.Type}} to type %s", dt) diff --git a/go/arrow/scalar/numeric.gen_test.go b/go/arrow/scalar/numeric.gen_test.go index 3b4082a96a0d1..67e4f6b4306d7 100644 --- a/go/arrow/scalar/numeric.gen_test.go +++ b/go/arrow/scalar/numeric.gen_test.go @@ -21,8 +21,8 @@ package scalar_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/scalar" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/scalar/numeric.gen_test.go.tmpl b/go/arrow/scalar/numeric.gen_test.go.tmpl index fe43dfbdfde0e..b62eec4ee3bd0 100644 --- a/go/arrow/scalar/numeric.gen_test.go.tmpl +++ b/go/arrow/scalar/numeric.gen_test.go.tmpl @@ -19,8 +19,8 @@ package scalar_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/scalar" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/scalar" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/scalar/parse.go b/go/arrow/scalar/parse.go index 9680589bd28fc..8361362a478b9 100644 --- a/go/arrow/scalar/parse.go +++ b/go/arrow/scalar/parse.go @@ -24,10 +24,12 @@ import ( "strconv" "time" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/float16" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/float16" + "github.com/apache/arrow/go/v10/arrow/memory" ) type TypeToScalar interface { @@ -375,8 +377,12 @@ func MakeScalarParam(val interface{}, dt arrow.DataType) (Scalar, error) { switch dt.ID() { case arrow.BINARY: return NewBinaryScalar(buf, dt), nil + case arrow.LARGE_BINARY: + return NewLargeBinaryScalar(buf), nil case arrow.STRING: return NewStringScalarFromBuffer(buf), nil + case arrow.LARGE_STRING: + return NewLargeStringScalarFromBuffer(buf), nil case arrow.FIXED_SIZE_BINARY: if buf.Len() == dt.(*arrow.FixedSizeBinaryType).ByteWidth { return NewFixedSizeBinaryScalar(buf, dt), nil @@ -387,8 +393,12 @@ func MakeScalarParam(val interface{}, dt arrow.DataType) (Scalar, error) { switch dt.ID() { case arrow.BINARY: return NewBinaryScalar(v, dt), nil + case arrow.LARGE_BINARY: + return NewLargeBinaryScalar(v), nil case arrow.STRING: return NewStringScalarFromBuffer(v), nil + case arrow.LARGE_STRING: + return NewLargeStringScalarFromBuffer(v), nil case arrow.FIXED_SIZE_BINARY: if v.Len() == dt.(*arrow.FixedSizeBinaryType).ByteWidth { return NewFixedSizeBinaryScalar(v, dt), nil @@ -408,6 +418,11 @@ func MakeScalarParam(val interface{}, dt arrow.DataType) (Scalar, error) { return nil, fmt.Errorf("inconsistent type for list scalar array and data type") } return NewListScalar(v), nil + case arrow.LARGE_LIST: + if !arrow.TypeEqual(v.DataType(), dt.(*arrow.LargeListType).Elem()) { + return nil, fmt.Errorf("inconsistent type for large list scalar array and data type") + } + return NewLargeListScalar(v), nil case arrow.FIXED_SIZE_LIST: if !arrow.TypeEqual(v.DataType(), dt.(*arrow.FixedSizeListType).Elem()) { return nil, fmt.Errorf("inconsistent type for list scalar array and data type") @@ -419,6 +434,19 @@ func MakeScalarParam(val interface{}, dt arrow.DataType) (Scalar, error) { } return NewMapScalar(v), nil } + case decimal128.Num: + if _, ok := dt.(*arrow.Decimal128Type); !ok { + return nil, fmt.Errorf("mismatch cannot create decimal128 scalar with incorrect data type") + } + + return NewDecimal128Scalar(v, dt), nil + case decimal256.Num: + if _, ok := dt.(*arrow.Decimal256Type); !ok { + return nil, fmt.Errorf("mismatch cannot create decimal256 scalar with incorrect data type") + } + + return NewDecimal256Scalar(v, dt), nil + } if arrow.IsInteger(dt.ID()) { diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go index db1ec4da4481c..a35eb519bad5f 100644 --- a/go/arrow/scalar/scalar.go +++ b/go/arrow/scalar/scalar.go @@ -26,14 +26,15 @@ import ( "strconv" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/endian" - "github.com/apache/arrow/go/v9/arrow/float16" - "github.com/apache/arrow/go/v9/arrow/internal/debug" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/float16" + "github.com/apache/arrow/go/v10/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/memory" "golang.org/x/xerrors" ) @@ -272,6 +273,10 @@ type Decimal128 struct { Value decimal128.Num } +func (s *Decimal128) Data() []byte { + return (*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&s.Value))[:] +} + func (s *Decimal128) value() interface{} { return s.Value } func (s *Decimal128) String() string { @@ -294,9 +299,29 @@ func (s *Decimal128) CastTo(to arrow.DataType) (Scalar, error) { return MakeNullScalar(to), nil } + dt := s.Type.(*arrow.Decimal128Type) + switch to.ID() { case arrow.DECIMAL128: - return NewDecimal128Scalar(s.Value, to), nil + to := to.(*arrow.Decimal128Type) + newVal, err := s.Value.Rescale(dt.Scale, to.Scale) + if err != nil { + return nil, err + } + if !newVal.FitsInPrecision(to.Precision) { + return nil, fmt.Errorf("decimal128 value %v will not fit in new precision %d", newVal, to.Precision) + } + return NewDecimal128Scalar(newVal, to), nil + case arrow.DECIMAL256: + to := to.(*arrow.Decimal256Type) + newVal, err := decimal256.FromDecimal128(s.Value).Rescale(dt.Scale, to.Scale) + if err != nil { + return nil, err + } + if !newVal.FitsInPrecision(to.Precision) { + return nil, fmt.Errorf("decimal256 value %v will not fit in new precision %d", newVal, to.Precision) + } + return NewDecimal256Scalar(newVal, to), nil case arrow.STRING: dt := s.Type.(*arrow.Decimal128Type) scale := big.NewFloat(math.Pow10(int(dt.Scale))) @@ -311,6 +336,63 @@ func NewDecimal128Scalar(val decimal128.Num, typ arrow.DataType) *Decimal128 { return &Decimal128{scalar{typ, true}, val} } +type Decimal256 struct { + scalar + Value decimal256.Num +} + +func (s *Decimal256) Data() []byte { + return (*[arrow.Decimal256SizeBytes]byte)(unsafe.Pointer(&s.Value))[:] +} + +func (s *Decimal256) value() interface{} { return s.Value } + +func (s *Decimal256) String() string { + if !s.Valid { + return "null" + } + val, err := s.CastTo(arrow.BinaryTypes.String) + if err != nil { + return "..." + } + return string(val.(*String).Value.Bytes()) +} + +func (s *Decimal256) equals(rhs Scalar) bool { + return s.Value == rhs.(*Decimal256).Value +} + +func (s *Decimal256) CastTo(to arrow.DataType) (Scalar, error) { + if !s.Valid { + return MakeNullScalar(to), nil + } + + dt := s.Type.(*arrow.Decimal256Type) + + switch to.ID() { + case arrow.DECIMAL256: + to := to.(*arrow.Decimal256Type) + newVal, err := s.Value.Rescale(dt.Scale, to.Scale) + if err != nil { + return nil, err + } + if !newVal.FitsInPrecision(to.Precision) { + return nil, fmt.Errorf("decimal256 value %v will not fit in new precision %d", newVal, to.Precision) + } + return NewDecimal256Scalar(newVal, to), nil + case arrow.STRING: + scale := big.NewFloat(math.Pow10(int(dt.Scale))) + val := (&big.Float{}).SetInt(s.Value.BigInt()) + return NewStringScalar(val.Quo(val, scale).Text('g', int(dt.Precision))), nil + } + + return nil, fmt.Errorf("cannot cast non-nil decimal128 scalar to type %s", to) +} + +func NewDecimal256Scalar(val decimal256.Num, typ arrow.DataType) *Decimal256 { + return &Decimal256{scalar{typ, true}, val} +} + type Extension struct { scalar Value Scalar @@ -392,10 +474,6 @@ func MakeNullScalar(dt arrow.DataType) Scalar { return makeNullFn[byte(dt.ID()&0x3f)](dt) } -func unsupportedScalarType(dt arrow.DataType) Scalar { - panic("unsupported scalar data type: " + dt.ID().String()) -} - func invalidScalarType(dt arrow.DataType) Scalar { panic("invalid scalar type: " + dt.ID().String()) } @@ -442,17 +520,33 @@ func init() { arrow.DECIMAL128: func(dt arrow.DataType) Scalar { return &Decimal128{scalar: scalar{dt, false}} }, arrow.LIST: func(dt arrow.DataType) Scalar { return &List{scalar: scalar{dt, false}} }, arrow.STRUCT: func(dt arrow.DataType) Scalar { return &Struct{scalar: scalar{dt, false}} }, - arrow.SPARSE_UNION: unsupportedScalarType, - arrow.DENSE_UNION: unsupportedScalarType, - arrow.DICTIONARY: func(dt arrow.DataType) Scalar { return NewNullDictScalar(dt) }, - arrow.LARGE_STRING: unsupportedScalarType, - arrow.LARGE_BINARY: unsupportedScalarType, - arrow.LARGE_LIST: unsupportedScalarType, - arrow.DECIMAL256: unsupportedScalarType, - arrow.MAP: func(dt arrow.DataType) Scalar { return &Map{&List{scalar: scalar{dt, false}}} }, - arrow.EXTENSION: func(dt arrow.DataType) Scalar { return &Extension{scalar: scalar{dt, false}} }, - arrow.FIXED_SIZE_LIST: func(dt arrow.DataType) Scalar { return &FixedSizeList{&List{scalar: scalar{dt, false}}} }, - arrow.DURATION: func(dt arrow.DataType) Scalar { return &Duration{scalar: scalar{dt, false}} }, + arrow.SPARSE_UNION: func(dt arrow.DataType) Scalar { + typ := dt.(*arrow.SparseUnionType) + if len(typ.Fields()) == 0 { + panic("cannot make scalar of empty union type") + } + values := make([]Scalar, len(typ.Fields())) + for i, f := range typ.Fields() { + values[i] = MakeNullScalar(f.Type) + } + return NewSparseUnionScalar(values, typ.TypeCodes()[0], typ) + }, + arrow.DENSE_UNION: func(dt arrow.DataType) Scalar { + typ := dt.(*arrow.DenseUnionType) + if len(typ.Fields()) == 0 { + panic("cannot make scalar of empty union type") + } + return NewDenseUnionScalar(MakeNullScalar(typ.Fields()[0].Type), typ.TypeCodes()[0], typ) + }, + arrow.DICTIONARY: func(dt arrow.DataType) Scalar { return NewNullDictScalar(dt) }, + arrow.LARGE_STRING: func(dt arrow.DataType) Scalar { return &LargeString{&String{&Binary{scalar: scalar{dt, false}}}} }, + arrow.LARGE_BINARY: func(dt arrow.DataType) Scalar { return &LargeBinary{&Binary{scalar: scalar{dt, false}}} }, + arrow.LARGE_LIST: func(dt arrow.DataType) Scalar { return &LargeList{&List{scalar: scalar{dt, false}}} }, + arrow.DECIMAL256: func(dt arrow.DataType) Scalar { return &Decimal256{scalar: scalar{dt, false}} }, + arrow.MAP: func(dt arrow.DataType) Scalar { return &Map{&List{scalar: scalar{dt, false}}} }, + arrow.EXTENSION: func(dt arrow.DataType) Scalar { return &Extension{scalar: scalar{dt, false}} }, + arrow.FIXED_SIZE_LIST: func(dt arrow.DataType) Scalar { return &FixedSizeList{&List{scalar: scalar{dt, false}}} }, + arrow.DURATION: func(dt arrow.DataType) Scalar { return &Duration{scalar: scalar{dt, false}} }, // invalid data types to fill out array size 2^6 - 1 63: invalidScalarType, } @@ -485,6 +579,8 @@ func GetScalar(arr arrow.Array, idx int) (Scalar, error) { return NewDayTimeIntervalScalar(arr.Value(idx)), nil case *array.Decimal128: return NewDecimal128Scalar(arr.Value(idx), arr.DataType()), nil + case *array.Decimal256: + return NewDecimal256Scalar(arr.Value(idx), arr.DataType()), nil case *array.Duration: return NewDurationScalar(arr.Value(idx), arr.DataType()), nil case array.ExtensionArray: @@ -570,12 +666,47 @@ func GetScalar(arr arrow.Array, idx int) (Scalar, error) { scalar.Value.Dict = arr.Dictionary() scalar.Value.Dict.Retain() return scalar, nil + case *array.SparseUnion: + var err error + typeCode := arr.TypeCode(idx) + children := make([]Scalar, arr.NumFields()) + defer func() { + if err != nil { + for _, c := range children { + if c == nil { + break + } + + if v, ok := c.(Releasable); ok { + v.Release() + } + } + } + }() + + for i := range arr.UnionType().Fields() { + if children[i], err = GetScalar(arr.Field(i), idx); err != nil { + return nil, err + } + } + return NewSparseUnionScalar(children, typeCode, arr.UnionType().(*arrow.SparseUnionType)), nil + case *array.DenseUnion: + typeCode := arr.TypeCode(idx) + child := arr.Field(arr.ChildID(idx)) + offset := arr.ValueOffset(idx) + value, err := GetScalar(child, int(offset)) + if err != nil { + return nil, err + } + return NewDenseUnionScalar(value, typeCode, arr.UnionType().(*arrow.DenseUnionType)), nil } return nil, fmt.Errorf("cannot create scalar from array of type %s", arr.DataType()) } // MakeArrayOfNull creates an array of size length which is all null of the given data type. +// +// Deprecated: Use array.MakeArrayOfNull func MakeArrayOfNull(dt arrow.DataType, length int, mem memory.Allocator) arrow.Array { var ( buffers = []*memory.Buffer{nil} @@ -673,14 +804,18 @@ func MakeArrayFromScalar(sc Scalar, length int, mem memory.Allocator) (arrow.Arr data.Release() }() return array.MakeFromData(data), nil - case PrimitiveScalar: - data := finishFixedWidth(s.Data()) - defer data.Release() - return array.MakeFromData(data), nil case *Decimal128: data := finishFixedWidth(arrow.Decimal128Traits.CastToBytes([]decimal128.Num{s.Value})) defer data.Release() return array.MakeFromData(data), nil + case *Decimal256: + data := finishFixedWidth(arrow.Decimal256Traits.CastToBytes([]decimal256.Num{s.Value})) + defer data.Release() + return array.MakeFromData(data), nil + case PrimitiveScalar: + data := finishFixedWidth(s.Data()) + defer data.Release() + return array.MakeFromData(data), nil case *List: values := make([]arrow.Array, length) for i := range values { @@ -803,6 +938,15 @@ func Hash(seed maphash.Seed, s Scalar) uint64 { binary.Write(&h, endian.Native, v.LowBits()) hash() binary.Write(&h, endian.Native, uint64(v.HighBits())) + case decimal256.Num: + arr := v.Array() + binary.Write(&h, endian.Native, arr[3]) + hash() + binary.Write(&h, endian.Native, arr[2]) + hash() + binary.Write(&h, endian.Native, arr[1]) + hash() + binary.Write(&h, endian.Native, arr[0]) } hash() return out @@ -817,6 +961,16 @@ func Hash(seed maphash.Seed, s Scalar) uint64 { return valueHash(s.Value.Days) & valueHash(s.Value.Milliseconds) case *MonthDayNanoInterval: return valueHash(s.Value.Months) & valueHash(s.Value.Days) & valueHash(s.Value.Nanoseconds) + case *SparseUnion: + // typecode is ignored when comparing for equality, so don't hash it either + out ^= Hash(seed, s.Value[s.ChildID]) + case *DenseUnion: + // typecode is ignored when comparing equality, so don't hash it either + out ^= Hash(seed, s.Value) + case *Dictionary: + if s.Value.Index.IsValid() { + out ^= Hash(seed, s.Value.Index) + } case PrimitiveScalar: h.Write(s.Data()) hash() @@ -831,10 +985,6 @@ func Hash(seed maphash.Seed, s Scalar) uint64 { out ^= Hash(seed, c) } } - case *Dictionary: - if s.Value.Index.IsValid() { - out ^= Hash(seed, s.Value.Index) - } } return out diff --git a/go/arrow/scalar/scalar_test.go b/go/arrow/scalar/scalar_test.go index 9b4f458c44dcb..7b05cf456887a 100644 --- a/go/arrow/scalar/scalar_test.go +++ b/go/arrow/scalar/scalar_test.go @@ -25,11 +25,11 @@ import ( "testing" "time" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/arrow/scalar" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) @@ -1143,3 +1143,276 @@ func TestDictionaryScalarValidateErrors(t *testing.T) { assert.Error(t, invalid.ValidateFull()) } } + +func checkGetValidUnionScalar(t *testing.T, arr arrow.Array, idx int, expected, expectedValue scalar.Scalar) { + s, err := scalar.GetScalar(arr, idx) + assert.NoError(t, err) + assert.NoError(t, s.ValidateFull()) + assert.True(t, scalar.Equals(expected, s)) + + assert.True(t, s.IsValid()) + assert.True(t, scalar.Equals(s.(scalar.Union).ChildValue(), expectedValue), s, expectedValue) +} + +func checkGetNullUnionScalar(t *testing.T, arr arrow.Array, idx int) { + s, err := scalar.GetScalar(arr, idx) + assert.NoError(t, err) + assert.True(t, scalar.Equals(scalar.MakeNullScalar(arr.DataType()), s)) + assert.False(t, s.IsValid()) + assert.False(t, s.(scalar.Union).ChildValue().IsValid()) +} + +func makeSparseUnionScalar(ty *arrow.SparseUnionType, val scalar.Scalar, idx int) scalar.Scalar { + return scalar.NewSparseUnionScalarFromValue(val, idx, ty) +} + +func makeDenseUnionScalar(ty *arrow.DenseUnionType, val scalar.Scalar, idx int) scalar.Scalar { + return scalar.NewDenseUnionScalar(val, ty.TypeCodes()[idx], ty) +} + +func makeSpecificNullScalar(dt arrow.UnionType, idx int) scalar.Scalar { + switch dt.Mode() { + case arrow.SparseMode: + values := make([]scalar.Scalar, len(dt.Fields())) + for i, f := range dt.Fields() { + values[i] = scalar.MakeNullScalar(f.Type) + } + return scalar.NewSparseUnionScalar(values, dt.TypeCodes()[idx], dt.(*arrow.SparseUnionType)) + case arrow.DenseMode: + code := dt.TypeCodes()[idx] + value := scalar.MakeNullScalar(dt.Fields()[idx].Type) + return scalar.NewDenseUnionScalar(value, code, dt.(*arrow.DenseUnionType)) + } + return nil +} + +type UnionScalarSuite struct { + suite.Suite + + mode arrow.UnionMode + dt arrow.DataType + unionType arrow.UnionType + alpha, beta, two, three scalar.Scalar + unionAlpha, unionBeta, unionTwo, unionThree scalar.Scalar + unionOtherTwo, unionStringNull, unionNumberNull scalar.Scalar +} + +func (s *UnionScalarSuite) scalarFromValue(idx int, val scalar.Scalar) scalar.Scalar { + switch s.mode { + case arrow.SparseMode: + return makeSparseUnionScalar(s.dt.(*arrow.SparseUnionType), val, idx) + case arrow.DenseMode: + return makeDenseUnionScalar(s.dt.(*arrow.DenseUnionType), val, idx) + } + return nil +} + +func (s *UnionScalarSuite) specificNull(idx int) scalar.Scalar { + return makeSpecificNullScalar(s.unionType, idx) +} + +func (s *UnionScalarSuite) SetupTest() { + s.dt = arrow.UnionOf(s.mode, []arrow.Field{ + {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + }, []arrow.UnionTypeCode{3, 42, 43}) + + s.unionType = s.dt.(arrow.UnionType) + + s.alpha = scalar.MakeScalar("alpha") + s.beta = scalar.MakeScalar("beta") + s.two = scalar.MakeScalar(uint64(2)) + s.three = scalar.MakeScalar(uint64(3)) + + s.unionAlpha = s.scalarFromValue(0, s.alpha) + s.unionBeta = s.scalarFromValue(0, s.beta) + s.unionTwo = s.scalarFromValue(1, s.two) + s.unionOtherTwo = s.scalarFromValue(2, s.two) + s.unionThree = s.scalarFromValue(1, s.three) + s.unionStringNull = s.specificNull(0) + s.unionNumberNull = s.specificNull(1) +} + +func (s *UnionScalarSuite) TestValidate() { + s.NoError(s.unionAlpha.ValidateFull()) + s.NoError(s.unionAlpha.Validate()) + s.NoError(s.unionBeta.ValidateFull()) + s.NoError(s.unionBeta.Validate()) + s.NoError(s.unionTwo.ValidateFull()) + s.NoError(s.unionTwo.Validate()) + s.NoError(s.unionOtherTwo.ValidateFull()) + s.NoError(s.unionOtherTwo.Validate()) + s.NoError(s.unionThree.ValidateFull()) + s.NoError(s.unionThree.Validate()) + s.NoError(s.unionStringNull.ValidateFull()) + s.NoError(s.unionStringNull.Validate()) + s.NoError(s.unionNumberNull.ValidateFull()) + s.NoError(s.unionNumberNull.Validate()) +} + +func (s *UnionScalarSuite) setTypeCode(sc scalar.Scalar, c arrow.UnionTypeCode) { + switch sc := sc.(type) { + case *scalar.SparseUnion: + sc.TypeCode = c + case *scalar.DenseUnion: + sc.TypeCode = c + } +} + +func (s *UnionScalarSuite) setIsValid(sc scalar.Scalar, v bool) { + switch sc := sc.(type) { + case *scalar.SparseUnion: + sc.Valid = v + case *scalar.DenseUnion: + sc.Valid = v + } +} + +func (s *UnionScalarSuite) TestValidateErrors() { + // type code doesn't exist + sc := s.scalarFromValue(0, s.alpha) + + // invalid type code + s.setTypeCode(sc, 0) + s.Error(sc.Validate()) + s.Error(sc.ValidateFull()) + + s.setIsValid(sc, false) + s.Error(sc.Validate()) + s.Error(sc.ValidateFull()) + + s.setTypeCode(sc, -42) + s.setIsValid(sc, true) + s.Error(sc.Validate()) + s.Error(sc.ValidateFull()) + + s.setIsValid(sc, false) + s.Error(sc.Validate()) + s.Error(sc.ValidateFull()) + + // type code doesn't correspond to child type + if sc, ok := sc.(*scalar.DenseUnion); ok { + sc.TypeCode = 42 + sc.Valid = true + s.Error(sc.Validate()) + s.Error(sc.ValidateFull()) + + sc = s.scalarFromValue(2, s.two).(*scalar.DenseUnion) + sc.TypeCode = 3 + s.Error(sc.Validate()) + s.Error(sc.ValidateFull()) + } + + // underlying value has invalid utf8 + sc = s.scalarFromValue(0, scalar.NewStringScalar("\xff")) + s.NoError(sc.Validate()) + s.Error(sc.ValidateFull()) +} + +func (s *UnionScalarSuite) TestEquals() { + // differing values + s.False(scalar.Equals(s.unionAlpha, s.unionBeta)) + s.False(scalar.Equals(s.unionTwo, s.unionThree)) + // differing validities + s.False(scalar.Equals(s.unionAlpha, s.unionStringNull)) + // differing types + s.False(scalar.Equals(s.unionAlpha, s.unionTwo)) + s.False(scalar.Equals(s.unionAlpha, s.unionOtherTwo)) + // type codes don't count when comparing union scalars: the underlying + // values are identical even though their provenance is different + s.True(scalar.Equals(s.unionTwo, s.unionOtherTwo)) + s.True(scalar.Equals(s.unionStringNull, s.unionNumberNull)) +} + +func (s *UnionScalarSuite) TestMakeNullScalar() { + sc := scalar.MakeNullScalar(s.dt) + s.True(arrow.TypeEqual(s.dt, sc.DataType())) + s.False(sc.IsValid()) + + // the first child field is chosen arbitrarily for the purposes of + // making a null scalar + switch s.mode { + case arrow.DenseMode: + asDense := sc.(*scalar.DenseUnion) + s.EqualValues(3, asDense.TypeCode) + s.False(asDense.Value.IsValid()) + case arrow.SparseMode: + asSparse := sc.(*scalar.SparseUnion) + s.EqualValues(3, asSparse.TypeCode) + s.False(asSparse.Value[asSparse.ChildID].IsValid()) + } +} + +type SparseUnionSuite struct { + UnionScalarSuite +} + +func (s *SparseUnionSuite) SetupSuite() { + s.mode = arrow.SparseMode +} + +func (s *SparseUnionSuite) TestGetScalar() { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(s.T(), 0) + + children := make([]arrow.Array, 3) + children[0], _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["alpha", "", "beta", null, "gamma"]`)) + defer children[0].Release() + children[1], _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Uint64, strings.NewReader(`[1, 2, 11, 22, null]`)) + defer children[1].Release() + children[2], _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Uint64, strings.NewReader(`[100, 101, 102, 103, 104]`)) + defer children[2].Release() + + typeIDs, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[3, 42, 3, 3, 42]`)) + defer typeIDs.Release() + + arr := array.NewSparseUnion(s.dt.(*arrow.SparseUnionType), 5, children, typeIDs.Data().Buffers()[1], 0) + defer arr.Release() + + checkGetValidUnionScalar(s.T(), arr, 0, s.unionAlpha, s.alpha) + checkGetValidUnionScalar(s.T(), arr, 1, s.unionTwo, s.two) + checkGetValidUnionScalar(s.T(), arr, 2, s.unionBeta, s.beta) + checkGetNullUnionScalar(s.T(), arr, 3) + checkGetNullUnionScalar(s.T(), arr, 4) +} + +type DenseUnionSuite struct { + UnionScalarSuite +} + +func (s *DenseUnionSuite) SetupSuite() { + s.mode = arrow.DenseMode +} + +func (s *DenseUnionSuite) TestGetScalar() { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(s.T(), 0) + + children := make([]arrow.Array, 3) + children[0], _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["alpha", "beta", null]`)) + defer children[0].Release() + children[1], _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Uint64, strings.NewReader(`[2, 3]`)) + defer children[1].Release() + children[2], _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Uint64, strings.NewReader(`[]`)) + defer children[2].Release() + + typeIDs, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[3, 42, 3, 3, 42]`)) + defer typeIDs.Release() + offsets, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 0, 1, 2, 1]`)) + defer offsets.Release() + + arr := array.NewDenseUnion(s.dt.(*arrow.DenseUnionType), 5, children, typeIDs.Data().Buffers()[1], offsets.Data().Buffers()[1], 0) + defer arr.Release() + + checkGetValidUnionScalar(s.T(), arr, 0, s.unionAlpha, s.alpha) + checkGetValidUnionScalar(s.T(), arr, 1, s.unionTwo, s.two) + checkGetValidUnionScalar(s.T(), arr, 2, s.unionBeta, s.beta) + checkGetNullUnionScalar(s.T(), arr, 3) + checkGetValidUnionScalar(s.T(), arr, 4, s.unionThree, s.three) +} + +func TestUnionScalars(t *testing.T) { + suite.Run(t, new(SparseUnionSuite)) + suite.Run(t, new(DenseUnionSuite)) +} diff --git a/go/arrow/scalar/temporal.go b/go/arrow/scalar/temporal.go index de63f0582f3c2..38a3dd98e4959 100644 --- a/go/arrow/scalar/temporal.go +++ b/go/arrow/scalar/temporal.go @@ -22,7 +22,7 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" ) func temporalToString(s TemporalScalar) string { diff --git a/go/arrow/schema.go b/go/arrow/schema.go index 80a45bda0e61b..bd44c248b919b 100644 --- a/go/arrow/schema.go +++ b/go/arrow/schema.go @@ -20,6 +20,8 @@ import ( "fmt" "sort" "strings" + + "github.com/apache/arrow/go/v10/arrow/endian" ) type Metadata struct { @@ -136,18 +138,24 @@ func (md Metadata) Equal(rhs Metadata) bool { // Schema is a sequence of Field values, describing the columns of a table or // a record batch. type Schema struct { - fields []Field - index map[string][]int - meta Metadata + fields []Field + index map[string][]int + meta Metadata + endianness endian.Endianness } // NewSchema returns a new Schema value from the slice of fields and metadata. // // NewSchema panics if there is a field with an invalid DataType. func NewSchema(fields []Field, metadata *Metadata) *Schema { + return NewSchemaWithEndian(fields, metadata, endian.NativeEndian) +} + +func NewSchemaWithEndian(fields []Field, metadata *Metadata, e endian.Endianness) *Schema { sc := &Schema{ - fields: make([]Field, 0, len(fields)), - index: make(map[string][]int, len(fields)), + fields: make([]Field, 0, len(fields)), + index: make(map[string][]int, len(fields)), + endianness: e, } if metadata != nil { sc.meta = metadata.clone() @@ -162,9 +170,15 @@ func NewSchema(fields []Field, metadata *Metadata) *Schema { return sc } -func (sc *Schema) Metadata() Metadata { return sc.meta } -func (sc *Schema) Fields() []Field { return sc.fields } -func (sc *Schema) Field(i int) Field { return sc.fields[i] } +func (sc *Schema) WithEndianness(e endian.Endianness) *Schema { + return NewSchemaWithEndian(sc.fields, &sc.meta, e) +} + +func (sc *Schema) Endianness() endian.Endianness { return sc.endianness } +func (sc *Schema) IsNativeEndian() bool { return sc.endianness == endian.NativeEndian } +func (sc *Schema) Metadata() Metadata { return sc.meta } +func (sc *Schema) Fields() []Field { return sc.fields } +func (sc *Schema) Field(i int) Field { return sc.fields[i] } func (sc *Schema) FieldsByName(n string) ([]Field, bool) { indices, ok := sc.index[n] @@ -196,6 +210,8 @@ func (sc *Schema) Equal(o *Schema) bool { return false case len(sc.fields) != len(o.fields): return false + case sc.endianness != o.endianness: + return false } for i := range sc.fields { @@ -215,6 +231,9 @@ func (s *Schema) String() string { } fmt.Fprintf(o, " - %v", f) } + if s.endianness != endian.NativeEndian { + fmt.Fprintf(o, "\n endianness: %v", s.endianness) + } if meta := s.Metadata(); meta.Len() > 0 { fmt.Fprintf(o, "\n metadata: %v", meta) } @@ -237,7 +256,11 @@ func (s *Schema) Fingerprint() string { b.WriteString(fieldFingerprint) b.WriteByte(';') } - // endianness + if s.endianness == endian.LittleEndian { + b.WriteByte('L') + } else { + b.WriteByte('B') + } b.WriteByte('}') return b.String() } diff --git a/go/arrow/schema_test.go b/go/arrow/schema_test.go index 0c7dc90745733..80d935e526378 100644 --- a/go/arrow/schema_test.go +++ b/go/arrow/schema_test.go @@ -20,6 +20,8 @@ import ( "fmt" "reflect" "testing" + + "github.com/apache/arrow/go/v10/arrow/endian" ) func TestMetadata(t *testing.T) { @@ -135,6 +137,7 @@ func TestSchema(t *testing.T) { md *Metadata err error serialize string + addEndian bool }{ { fields: []Field{ @@ -185,6 +188,27 @@ func TestSchema(t *testing.T) { - dup: type=int32 - dup: type=int64`, }, + { + fields: []Field{ + {Name: "f1", Type: PrimitiveTypes.Int32, Nullable: true}, + {Name: "f2", Type: PrimitiveTypes.Uint8}, + {Name: "f3", Type: BinaryTypes.String, Nullable: true}, + {Name: "f4", Type: ListOf(PrimitiveTypes.Int16), Nullable: true}, + }, + md: func() *Metadata { + md := MetadataFrom(map[string]string{"k1": "v1", "k2": "v2"}) + return &md + }(), + addEndian: true, // only print endianness if non-native endian + serialize: `schema: + fields: 4 + - f1: type=int32, nullable + - f2: type=uint8 + - f3: type=utf8, nullable + - f4: type=list, nullable + endianness: ` + endian.NonNativeEndian.String() + ` + metadata: ["k1": "v1", "k2": "v2"]`, + }, } { t.Run("", func(t *testing.T) { if tc.err != nil { @@ -209,6 +233,9 @@ func TestSchema(t *testing.T) { } s := NewSchema(tc.fields, tc.md) + if tc.addEndian { + s = s.WithEndianness(endian.NonNativeEndian) + } if got, want := len(s.Fields()), len(tc.fields); got != want { t.Fatalf("invalid number of fields. got=%d, want=%d", got, want) @@ -342,6 +369,26 @@ func TestSchemaEqual(t *testing.T) { }, md), want: false, }, + { + a: NewSchemaWithEndian(fields, nil, endian.LittleEndian), + b: NewSchemaWithEndian(fields, nil, endian.LittleEndian), + want: true, + }, + { + a: NewSchemaWithEndian(fields, nil, endian.LittleEndian), + b: NewSchemaWithEndian(fields, nil, endian.BigEndian), + want: false, + }, + { + a: NewSchemaWithEndian(fields, nil, endian.LittleEndian), + b: NewSchema(fields, nil), + want: !endian.IsBigEndian, + }, + { + a: NewSchemaWithEndian(fields, nil, endian.BigEndian), + b: NewSchema(fields, nil), + want: endian.IsBigEndian, + }, } { t.Run("", func(t *testing.T) { if !tc.a.Equal(tc.a) { diff --git a/go/arrow/table.go b/go/arrow/table.go index 68c1ca0ba47d3..c4a6351cce269 100644 --- a/go/arrow/table.go +++ b/go/arrow/table.go @@ -19,7 +19,7 @@ package arrow import ( "sync/atomic" - "github.com/apache/arrow/go/v9/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/internal/debug" ) // Table represents a logical sequence of chunked arrays of equal length. It is diff --git a/go/arrow/tensor/numeric.gen.go b/go/arrow/tensor/numeric.gen.go index ef398a45b1e67..0f9ff941b849c 100644 --- a/go/arrow/tensor/numeric.gen.go +++ b/go/arrow/tensor/numeric.gen.go @@ -19,7 +19,7 @@ package tensor import ( - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" ) // Int8 is an n-dim array of int8s. diff --git a/go/arrow/tensor/numeric.gen.go.tmpl b/go/arrow/tensor/numeric.gen.go.tmpl index 56974ddf1e3e5..43393c77edb57 100644 --- a/go/arrow/tensor/numeric.gen.go.tmpl +++ b/go/arrow/tensor/numeric.gen.go.tmpl @@ -17,8 +17,8 @@ package tensor import ( - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" ) {{range .In}} diff --git a/go/arrow/tensor/numeric.gen_test.go b/go/arrow/tensor/numeric.gen_test.go index 974015125adf6..0921e127b9ca3 100644 --- a/go/arrow/tensor/numeric.gen_test.go +++ b/go/arrow/tensor/numeric.gen_test.go @@ -23,10 +23,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/arrow/tensor" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/tensor" ) func TestTensorInt8(t *testing.T) { diff --git a/go/arrow/tensor/numeric.gen_test.go.tmpl b/go/arrow/tensor/numeric.gen_test.go.tmpl index c31281c5c3ea5..30907813fb9a7 100644 --- a/go/arrow/tensor/numeric.gen_test.go.tmpl +++ b/go/arrow/tensor/numeric.gen_test.go.tmpl @@ -21,10 +21,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/arrow/tensor" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/tensor" ) {{range .In}} diff --git a/go/arrow/tensor/tensor.go b/go/arrow/tensor/tensor.go index 0b4c332bf4cc7..743c81f3fbdae 100644 --- a/go/arrow/tensor/tensor.go +++ b/go/arrow/tensor/tensor.go @@ -21,8 +21,8 @@ import ( "fmt" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/internal/debug" ) // Interface represents an n-dimensional array of numerical data. diff --git a/go/arrow/tensor/tensor_test.go b/go/arrow/tensor/tensor_test.go index bbd68a0ece70c..a9dea8428b4e7 100644 --- a/go/arrow/tensor/tensor_test.go +++ b/go/arrow/tensor/tensor_test.go @@ -21,10 +21,10 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/arrow/tensor" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/tensor" ) func TestTensor(t *testing.T) { diff --git a/go/arrow/type_traits_boolean.go b/go/arrow/type_traits_boolean.go index b3eb3bfa5303f..343ec5ff8ccaa 100644 --- a/go/arrow/type_traits_boolean.go +++ b/go/arrow/type_traits_boolean.go @@ -17,7 +17,7 @@ package arrow import ( - "github.com/apache/arrow/go/v9/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/bitutil" ) type booleanTraits struct{} diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go index 1be49deac3961..23e75381832b8 100644 --- a/go/arrow/type_traits_decimal128.go +++ b/go/arrow/type_traits_decimal128.go @@ -20,8 +20,8 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/endian" ) // Decimal128 traits diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go new file mode 100644 index 0000000000000..15e69e96799d8 --- /dev/null +++ b/go/arrow/type_traits_decimal256.go @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arrow + +import ( + "reflect" + "unsafe" + + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/endian" +) + +// Decimal256 traits +var Decimal256Traits decimal256Traits + +const ( + Decimal256SizeBytes = int(unsafe.Sizeof(decimal256.Num{})) +) + +type decimal256Traits struct{} + +func (decimal256Traits) BytesRequired(n int) int { return Decimal256SizeBytes * n } + +func (decimal256Traits) PutValue(b []byte, v decimal256.Num) { + for i, a := range v.Array() { + start := i * 8 + endian.Native.PutUint64(b[start:], a) + } +} + +// CastFromBytes reinterprets the slice b to a slice of decimal256 +func (decimal256Traits) CastFromBytes(b []byte) []decimal256.Num { + h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + + var res []decimal256.Num + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = h.Len / Decimal256SizeBytes + s.Cap = h.Cap / Decimal256SizeBytes + + return res +} + +func (decimal256Traits) CastToBytes(b []decimal256.Num) []byte { + h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + + var res []byte + s := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + s.Data = h.Data + s.Len = h.Len * Decimal256SizeBytes + s.Cap = h.Cap * Decimal256SizeBytes + + return res +} + +func (decimal256Traits) Copy(dst, src []decimal256.Num) { copy(dst, src) } diff --git a/go/arrow/type_traits_float16.go b/go/arrow/type_traits_float16.go index b314e5b344a5c..4408135095ec1 100644 --- a/go/arrow/type_traits_float16.go +++ b/go/arrow/type_traits_float16.go @@ -20,8 +20,8 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v9/arrow/float16" - "github.com/apache/arrow/go/v9/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/float16" + "github.com/apache/arrow/go/v10/arrow/endian" ) // Float16 traits diff --git a/go/arrow/type_traits_interval.go b/go/arrow/type_traits_interval.go index 28504158dce52..1253c501aa29e 100644 --- a/go/arrow/type_traits_interval.go +++ b/go/arrow/type_traits_interval.go @@ -20,8 +20,8 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v9/arrow/endian" - "github.com/apache/arrow/go/v9/arrow/internal/debug" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/internal/debug" ) var ( diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go index 6c9b0081dcd90..a96f537306c50 100644 --- a/go/arrow/type_traits_numeric.gen.go +++ b/go/arrow/type_traits_numeric.gen.go @@ -23,7 +23,7 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v9/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/endian" ) var ( diff --git a/go/arrow/type_traits_numeric.gen.go.tmpl b/go/arrow/type_traits_numeric.gen.go.tmpl index 5c1b3c175a409..290d7f33b0a1a 100644 --- a/go/arrow/type_traits_numeric.gen.go.tmpl +++ b/go/arrow/type_traits_numeric.gen.go.tmpl @@ -21,7 +21,7 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v9/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/endian" ) var ( diff --git a/go/arrow/type_traits_numeric.gen_test.go b/go/arrow/type_traits_numeric.gen_test.go index c6620d53d14d2..8b561b2dd7df0 100644 --- a/go/arrow/type_traits_numeric.gen_test.go +++ b/go/arrow/type_traits_numeric.gen_test.go @@ -22,7 +22,7 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" ) func TestInt64Traits(t *testing.T) { diff --git a/go/arrow/type_traits_numeric.gen_test.go.tmpl b/go/arrow/type_traits_numeric.gen_test.go.tmpl index 990bcc77079e1..aa8c0388ed7c7 100644 --- a/go/arrow/type_traits_numeric.gen_test.go.tmpl +++ b/go/arrow/type_traits_numeric.gen_test.go.tmpl @@ -20,7 +20,7 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" ) {{- range .In}} diff --git a/go/arrow/type_traits_test.go b/go/arrow/type_traits_test.go index fe270fda9fc86..7fc5fe658f7d8 100644 --- a/go/arrow/type_traits_test.go +++ b/go/arrow/type_traits_test.go @@ -21,9 +21,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/float16" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/float16" ) func TestBooleanTraits(t *testing.T) { diff --git a/go/arrow/unionmode_string.go b/go/arrow/unionmode_string.go new file mode 100644 index 0000000000000..394d4f6644277 --- /dev/null +++ b/go/arrow/unionmode_string.go @@ -0,0 +1,25 @@ +// Code generated by "stringer -type=UnionMode -linecomment"; DO NOT EDIT. + +package arrow + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[SparseMode-2] + _ = x[DenseMode-3] +} + +const _UnionMode_name = "SPARSEDENSE" + +var _UnionMode_index = [...]uint8{0, 6, 11} + +func (i UnionMode) String() string { + i -= 2 + if i < 0 || i >= UnionMode(len(_UnionMode_index)-1) { + return "UnionMode(" + strconv.FormatInt(int64(i+2), 10) + ")" + } + return _UnionMode_name[_UnionMode_index[i]:_UnionMode_index[i+1]] +} diff --git a/go/go.mod b/go/go.mod index 65e54cd67915c..c27de3029f9bd 100644 --- a/go/go.mod +++ b/go/go.mod @@ -14,34 +14,41 @@ // See the License for the specific language governing permissions and // limitations under the License. -module github.com/apache/arrow/go/v9 +module github.com/apache/arrow/go/v10 -go 1.15 +go 1.16 require ( github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c github.com/andybalholm/brotli v1.0.4 - github.com/apache/thrift v0.15.0 + github.com/apache/thrift v0.16.0 github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 - github.com/goccy/go-json v0.9.6 + github.com/goccy/go-json v0.9.10 github.com/golang/snappy v0.0.4 - github.com/google/flatbuffers v2.0.5+incompatible - github.com/google/go-cmp v0.5.7 // indirect - github.com/klauspost/asmfmt v1.3.1 - github.com/klauspost/compress v1.14.2 + github.com/google/flatbuffers v2.0.6+incompatible + github.com/klauspost/asmfmt v1.3.2 + github.com/klauspost/compress v1.15.9 + github.com/kr/pretty v0.1.0 // indirect + github.com/mattn/go-isatty v0.0.14 // indirect github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 - github.com/pierrec/lz4/v4 v4.1.12 - github.com/stretchr/testify v1.7.2 - github.com/zeebo/xxh3 v1.0.1 - golang.org/x/exp v0.0.0-20211216164055-b2b84827b756 - golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd // indirect - golang.org/x/sync v0.0.0-20220513210516-0976fa681c29 - golang.org/x/sys v0.0.0-20220412211240-33da011f77ad - golang.org/x/tools v0.1.10 - golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f - gonum.org/v1/gonum v0.9.3 - google.golang.org/genproto v0.0.0-20220126215142-9970aeb2e350 // indirect - google.golang.org/grpc v1.44.0 - google.golang.org/protobuf v1.27.1 + github.com/pierrec/lz4/v4 v4.1.15 + github.com/stretchr/testify v1.8.0 + github.com/zeebo/xxh3 v1.0.2 + golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e + golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 + golang.org/x/sys v0.0.0-20220808155132-1c4a2a72c664 + golang.org/x/tools v0.1.12 + golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f + gonum.org/v1/gonum v0.11.0 + google.golang.org/grpc v1.48.0 + google.golang.org/protobuf v1.28.1 + gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect + lukechampine.com/uint128 v1.2.0 // indirect + modernc.org/cc/v3 v3.36.1 // indirect + modernc.org/ccgo/v3 v3.16.8 // indirect + modernc.org/libc v1.16.19 // indirect + modernc.org/opt v0.1.3 // indirect + modernc.org/sqlite v1.18.0 + modernc.org/strutil v1.1.2 // indirect ) diff --git a/go/go.sum b/go/go.sum index d883c7c1222bc..58e2996e4351f 100644 --- a/go/go.sum +++ b/go/go.sum @@ -1,120 +1,65 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= +git.sr.ht/~sbinet/gg v0.3.1/go.mod h1:KGYtlADtqsqANL9ueOFkWymvzUvLMQllU5Ixo+8v3pc= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= -github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= -github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= -github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= -github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= +github.com/ajstarks/deck v0.0.0-20200831202436-30c9fc6549a9/go.mod h1:JynElWSGnm/4RlzPXRlREEwqTHAN3T56Bv2ITsFT3gY= +github.com/ajstarks/deck/generate v0.0.0-20210309230005-c3f852c02e19/go.mod h1:T13YZdzov6OU0A1+RfKZiZN9ca6VeKdBdyDV+BY97Tk= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/apache/thrift v0.15.0 h1:aGvdaR0v1t9XLgjtBYwxcBvBOTMqClzwE26CHOgjW1Y= -github.com/apache/thrift v0.15.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= -github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= -github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= -github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/aryann/difflib v0.0.0-20170710044230-e206f873d14a/go.mod h1:DAHtR1m6lCRdSC2Tm3DSWRPvIPr6xNKyeHdqDQSQT+A= -github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU= -github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= -github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= -github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= +github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY= +github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= -github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= -github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= +github.com/boombuler/barcode v1.0.1/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= -github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 h1:bWDMxwH3px2JBh6AyO7hdCn/PkvCZXii8TGj7sbtEbQ= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= -github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= -github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= -github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= -github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= -github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g= +github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= -github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= +github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= -github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= -github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g= github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks= github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= +github.com/go-fonts/liberation v0.2.0/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-kit/kit v0.10.0/go.mod h1:xUsJbQ/Fp4kEt7AFgCuvyX4a71u8h9jB8tj/ORgOZ7o= github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= -github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= -github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= -github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/goccy/go-json v0.9.6 h1:5/4CtRQdtsX0sal8fdVhTaiMN01Ri8BExZZ8iRmHQ6E= -github.com/goccy/go-json v0.9.6/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/go-latex/latex v0.0.0-20210823091927-c0d11ff05a81/go.mod h1:SX0U8uGpxhq9o2S/CELCSUxEWWAuoCUcVCQWv7G2OCk= +github.com/go-pdf/fpdf v0.5.0/go.mod h1:HzcnA+A23uwogo0tp9yU+l3V+KXhiESpt1PMayhOh5M= +github.com/go-pdf/fpdf v0.6.0/go.mod h1:HzcnA+A23uwogo0tp9yU+l3V+KXhiESpt1PMayhOh5M= +github.com/goccy/go-json v0.9.10 h1:hCeNmprSNLB8B8vQKWl6DpuH0t60oEs+TAk9a7CScKc= +github.com/goccy/go-json v0.9.10/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= @@ -128,234 +73,84 @@ github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/flatbuffers v2.0.5+incompatible h1:ANsW0idDAXIY+mNHzIHxWRfabV2x5LUEEIIWcwsYgB8= -github.com/google/flatbuffers v2.0.5+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/flatbuffers v2.0.6+incompatible h1:XHFReMv7nFFusa+CEokzWbzaYocKXI6C7hdU5Kgh9Lw= +github.com/google/flatbuffers v2.0.6+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= -github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= -github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= -github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= -github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= -github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= -github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= -github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= -github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= -github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= -github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= -github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= -github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= -github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= -github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= -github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= -github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= +github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/asmfmt v1.3.1 h1:7xZi1N7s9gTLbqiM8KUv8TLyysavbTRGBT5/ly0bRtw= -github.com/klauspost/asmfmt v1.3.1/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= -github.com/klauspost/compress v1.14.2 h1:S0OHlFk/Gbon/yauFJ4FfJJF5V0fc5HbBTJazi28pRw= -github.com/klauspost/compress v1.14.2/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= +github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= +github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY= +github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= -github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= -github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= -github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= -github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-sqlite3 v1.14.12 h1:TJ1bhYJPV44phC+IMu1u2K/i5RriLTPe+yc68XDJ1Z0= +github.com/mattn/go-sqlite3 v1.14.12/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= -github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= -github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= -github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= -github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= -github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= -github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU= -github.com/nats-io/nats-server/v2 v2.1.2/go.mod h1:Afk+wRZqkMQs/p45uXdrVLuab3gwv3Z8C4HTBu8GD/k= -github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= -github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= -github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= -github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= -github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= -github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= -github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= -github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= -github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= -github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/openzipkin-contrib/zipkin-go-opentracing v0.4.5/go.mod h1:/wsWhb9smxSfWAKL3wpBW7V8scJMt8N8gnaMCS9E/cA= -github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw= -github.com/openzipkin/zipkin-go v0.2.1/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= -github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= -github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM= -github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= -github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= -github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= -github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I= -github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pierrec/lz4/v4 v4.1.12 h1:44l88ehTZAUGW4VlO1QC4zkilL99M6Y9MXNwEs0uzP8= -github.com/pierrec/lz4/v4 v4.1.12/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/phpdave11/gofpdi v1.0.13/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= +github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= +github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= -github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= -github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= -github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= -github.com/rs/zerolog v1.21.0/go.mod h1:ZPhntP/xmq1nnND05hhpAh2QMhSsA4UN3MGZ6O2J3hM= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= -github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= -github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= -github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= +github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1 h1:2vfRuCMp5sSVIDSqO8oNnWJq7mPa6KVP3iPIwFBuy8A= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0 h1:M2gUjqZET1qApGOWNSnZ49BAIMX4F/1plDv3+l31EJ4= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.2 h1:4jaiDzPyXQvSd7D0EjG45355tLlV3VOECpq10pLC+8s= -github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= -github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= -github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -github.com/zeebo/xxh3 v1.0.1 h1:FMSRIbkrLikb/0hZxmltpg84VkqDAT5M8ufXynuhXsI= -github.com/zeebo/xxh3 v1.0.1/go.mod h1:8VHV24/3AZLn3b6Mlp/KuC33LWH687Wq6EnziEB+rsA= -go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= -go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= -go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= -go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opentelemetry.io/otel v0.20.0/go.mod h1:Y3ugLH2oa81t5QO+Lty+zXf8zC9L26ax4Nzoxm/dooo= -go.opentelemetry.io/otel/metric v0.20.0/go.mod h1:598I5tYlH1vzBjn+BTuhzTCSb/9debfNp6R3s7Pr1eU= -go.opentelemetry.io/otel/oteltest v0.20.0/go.mod h1:L7bgKf9ZB7qCwT9Up7i9/pn0PWIa9FqQ2IQ8LoxiGnw= -go.opentelemetry.io/otel/sdk v0.20.0/go.mod h1:g/IcepuwNsoiX5Byy2nNV0ySUF1em498m7hBWC279Yc= -go.opentelemetry.io/otel/trace v0.20.0/go.mod h1:6GjCW8zgDjwGHGa6GkyeB8+/5vjT16gUEi0Nf1iBdgw= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= -go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= -go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= -go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= -go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= -go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= -go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= -go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= @@ -364,10 +159,9 @@ golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190731235908-ec7cb31e5a56/go.mod h1:JhuoJpWY28nO4Vef9tZUw9qufEGTyX1+7lmHxV5q5G4= golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE= -golang.org/x/exp v0.0.0-20211216164055-b2b84827b756 h1:/5Bs7sWi0i3rOVO5KnM55OwugpsD4bRW1zywKoZjbkI= -golang.org/x/exp v0.0.0-20211216164055-b2b84827b756/go.mod h1:b9TAUYHmRtqA6klRHApnXMnj+OyLce4yF5cZCUbk2ps= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -377,166 +171,115 @@ golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+o golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20210607152325-775e3b0c77b9/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= +golang.org/x/image v0.0.0-20210628002857-a66eb6448b8d/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= +golang.org/x/image v0.0.0-20211028202545-6944b10bf410/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= +golang.org/x/image v0.0.0-20220302094943-723b81ca9867/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= -golang.org/x/mobile v0.0.0-20201217150744-e6ae53a27f4f/go.mod h1:skQtrUTUwhdJvXM/2KKJzY8pDgNr9I/FOMqDVRPBUS4= -golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.1.1-0.20191209134235-331c550502dd/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= -golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= -golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 h1:kQgndtyPBW/JIYERgdxfwMYh3AVStj88WQTlNDi2a+o= golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd h1:O7DYs+zxREGLKzKoMQrtrEacpb0ZVXA5rIwylE2Xchk= -golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220513210516-0976fa681c29 h1:w8s32wxx3sY+OjLlv9qltkLU5yvJzxjjgiHWLjdIcw4= -golang.org/x/sync v0.0.0-20220513210516-0976fa681c29/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220412211240-33da011f77ad h1:ntjMns5wyP/fN65tdBD4g8J5w8n015+iIIs9rtjXkY0= -golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220808155132-1c4a2a72c664 h1:v1W7bwXHsnLLloWYTVEdvGvA7BHMeBYsPcF0GLDxIRs= +golang.org/x/sys v0.0.0-20220808155132-1c4a2a72c664/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200117012304-6edc0a871e69/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= -golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= -golang.org/x/tools v0.1.10 h1:QjFRCZxdOhBJ/UNgnBZLbNV13DlbnK0quyivTnXJM20= +golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= +golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f h1:GGU+dLjvlC3qDwqYgL6UgRmHXhOOgns0bZu2Ty5mm6U= -golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f h1:uF6paiQQebLeSXkrTqHqz0MXhXXS1KgF41eUdBNvxK0= +golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= -gonum.org/v1/gonum v0.9.3 h1:DnoIG+QAMaF5NvxnGe/oKsgKcAc6PcUyl8q0VetfQ8s= gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= -gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc= +gonum.org/v1/gonum v0.11.0 h1:f1IJhK4Km5tBJmaiJXtk/PkL4cdVX6J+tGiM187uT5E= +gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA= gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= -google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= +gonum.org/v1/plot v0.10.1/go.mod h1:VZW5OlhkL1mysU9vaqNHnsy86inf6Ot+jB3r+BczCEo= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20220126215142-9970aeb2e350 h1:YxHp5zqIcAShDEvRr5/0rVESVS+njYF68PSdazrNLJo= -google.golang.org/genproto v0.0.0-20220126215142-9970aeb2e350/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= -google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= -google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= -google.golang.org/grpc v1.22.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/grpc v1.44.0 h1:weqSxi/TMs1SqFRMHCtBgXRs8k3X39QIDEZ0pRcttUg= -google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= +google.golang.org/grpc v1.48.0 h1:rQOsyJ/8+ufEDJd/Gdsz7HG220Mh9HAhFHRGnIjda0w= +google.golang.org/grpc v1.48.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -548,31 +291,60 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= -gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.1.3/go.mod h1:NgwopIslSNH47DimFoV78dnkksY2EFtX0ajyb3K/las= +lukechampine.com/uint128 v1.1.1/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= +lukechampine.com/uint128 v1.2.0 h1:mBi/5l91vocEN8otkC5bDLhi2KdCticRiwbdB0O+rjI= +lukechampine.com/uint128 v1.2.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= +modernc.org/cc/v3 v3.36.0/go.mod h1:NFUHyPn4ekoC/JHeZFfZurN6ixxawE1BnVonP/oahEI= +modernc.org/cc/v3 v3.36.1 h1:CICrjwr/1M4+6OQ4HJZ/AHxjcwe67r5vPUF518MkO8A= +modernc.org/cc/v3 v3.36.1/go.mod h1:NFUHyPn4ekoC/JHeZFfZurN6ixxawE1BnVonP/oahEI= +modernc.org/ccgo/v3 v3.0.0-20220428102840-41399a37e894/go.mod h1:eI31LL8EwEBKPpNpA4bU1/i+sKOwOrQy8D87zWUcRZc= +modernc.org/ccgo/v3 v3.0.0-20220430103911-bc99d88307be/go.mod h1:bwdAnOoaIt8Ax9YdWGjxWsdkPcZyRPHqrOvJxaKAKGw= +modernc.org/ccgo/v3 v3.16.4/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ= +modernc.org/ccgo/v3 v3.16.6/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ= +modernc.org/ccgo/v3 v3.16.8 h1:G0QNlTqI5uVgczBWfGKs7B++EPwCfXPWGD2MdeKloDs= +modernc.org/ccgo/v3 v3.16.8/go.mod h1:zNjwkizS+fIFDrDjIAgBSCLkWbJuHF+ar3QRn+Z9aws= +modernc.org/ccorpus v1.11.6 h1:J16RXiiqiCgua6+ZvQot4yUuUy8zxgqbqEEUuGPlISk= +modernc.org/ccorpus v1.11.6/go.mod h1:2gEUTrWqdpH2pXsmTM1ZkjeSrUWDpjMu2T6m29L/ErQ= +modernc.org/httpfs v1.0.6 h1:AAgIpFZRXuYnkjftxTAZwMIiwEqAfk8aVB2/oA6nAeM= +modernc.org/httpfs v1.0.6/go.mod h1:7dosgurJGp0sPaRanU53W4xZYKh14wfzX420oZADeHM= +modernc.org/libc v0.0.0-20220428101251-2d5f3daf273b/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA= +modernc.org/libc v1.16.0/go.mod h1:N4LD6DBE9cf+Dzf9buBlzVJndKr/iJHG97vGLHYnb5A= +modernc.org/libc v1.16.1/go.mod h1:JjJE0eu4yeK7tab2n4S1w8tlWd9MxXLRzheaRnAKymU= +modernc.org/libc v1.16.7/go.mod h1:hYIV5VZczAmGZAnG15Vdngn5HSF5cSkbvfz2B7GRuVU= +modernc.org/libc v1.16.17/go.mod h1:hYIV5VZczAmGZAnG15Vdngn5HSF5cSkbvfz2B7GRuVU= +modernc.org/libc v1.16.19 h1:S8flPn5ZeXx6iw/8yNa986hwTQDrY8RXU7tObZuAozo= +modernc.org/libc v1.16.19/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA= +modernc.org/mathutil v1.2.2/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/mathutil v1.4.1 h1:ij3fYGe8zBF4Vu+g0oT7mB06r8sqGWKuJu1yXeR4by8= +modernc.org/mathutil v1.4.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.1.1 h1:bDOL0DIDLQv7bWhP3gMvIrnoFw+Eo6F7a2QK9HPDiFU= +modernc.org/memory v1.1.1/go.mod h1:/0wo5ibyrQiaoUoH7f9D8dnglAmILJ5/cxZlRECf+Nw= +modernc.org/opt v0.1.1/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= +modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4= +modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= +modernc.org/sqlite v1.18.0 h1:ef66qJSgKeyLyrF4kQ2RHw/Ue3V89fyFNbGL073aDjI= +modernc.org/sqlite v1.18.0/go.mod h1:B9fRWZacNxJBHoCJZQr1R54zhVn3fjfl0aszflrTSxY= +modernc.org/strutil v1.1.1/go.mod h1:DE+MQQ/hjKBZS2zNInV5hhcipt5rLPWkmpbGeW5mmdw= +modernc.org/strutil v1.1.2 h1:iFBDH6j1Z0bN/Q9udJnnFoFpENA4252qe/7/5woE5MI= +modernc.org/strutil v1.1.2/go.mod h1:OYajnUAcI/MX+XD/Wx7v1bbdvcQSvxgtb0gC+u3d3eg= +modernc.org/tcl v1.13.1 h1:npxzTwFTZYM8ghWicVIX1cRWzj7Nd8i6AqqX2p+IYao= +modernc.org/tcl v1.13.1/go.mod h1:XOLfOwzhkljL4itZkK6T72ckMgvj0BDsnKNdZVUOecw= +modernc.org/token v1.0.0 h1:a0jaWiNMDhDUtqOj09wvjWWAqd3q7WpBulmL9H2egsk= +modernc.org/token v1.0.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= +modernc.org/z v1.5.1 h1:RTNHdsrOpeoSeOF4FbzTo8gBYByaJ5xT7NgZ9ZqRiJM= +modernc.org/z v1.5.1/go.mod h1:eWFB510QWW5Th9YGZT81s+LwvaAs3Q2yr4sP0rmLkv8= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= -sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU= diff --git a/go/internal/bitutils/bit_block_counter.go b/go/internal/bitutils/bit_block_counter.go index 56ce644c3a632..4c3bf3037d311 100644 --- a/go/internal/bitutils/bit_block_counter.go +++ b/go/internal/bitutils/bit_block_counter.go @@ -21,8 +21,8 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/internal/utils" ) func loadWord(byt []byte) uint64 { diff --git a/go/internal/bitutils/bit_block_counter_test.go b/go/internal/bitutils/bit_block_counter_test.go index db7ecb64c10c7..d7f68db70b135 100644 --- a/go/internal/bitutils/bit_block_counter_test.go +++ b/go/internal/bitutils/bit_block_counter_test.go @@ -19,9 +19,9 @@ package bitutils_test import ( "testing" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/bitutils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/bitutils" "github.com/stretchr/testify/assert" "golang.org/x/exp/rand" ) diff --git a/go/internal/bitutils/bit_run_reader.go b/go/internal/bitutils/bit_run_reader.go index 19bc8a263fd2a..9c6412ffb4004 100644 --- a/go/internal/bitutils/bit_run_reader.go +++ b/go/internal/bitutils/bit_run_reader.go @@ -22,9 +22,9 @@ import ( "math/bits" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/internal/utils" ) // BitRun represents a run of bits with the same value of length Len diff --git a/go/internal/bitutils/bit_run_reader_test.go b/go/internal/bitutils/bit_run_reader_test.go index 7abdc412b9692..f5b9b7858d8dd 100644 --- a/go/internal/bitutils/bit_run_reader_test.go +++ b/go/internal/bitutils/bit_run_reader_test.go @@ -21,9 +21,9 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/endian" - "github.com/apache/arrow/go/v9/internal/bitutils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/internal/bitutils" "github.com/stretchr/testify/assert" ) diff --git a/go/internal/bitutils/bit_set_run_reader.go b/go/internal/bitutils/bit_set_run_reader.go index 8a90a8a3f00ff..2dae0f989c79c 100644 --- a/go/internal/bitutils/bit_set_run_reader.go +++ b/go/internal/bitutils/bit_set_run_reader.go @@ -20,8 +20,8 @@ import ( "encoding/binary" "math/bits" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/internal/utils" ) // IsMultipleOf64 returns whether v is a multiple of 64. diff --git a/go/internal/bitutils/bit_set_run_reader_test.go b/go/internal/bitutils/bit_set_run_reader_test.go index 8401f09cc82ce..55f658b3514d0 100644 --- a/go/internal/bitutils/bit_set_run_reader_test.go +++ b/go/internal/bitutils/bit_set_run_reader_test.go @@ -20,9 +20,9 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/internal/utils" "github.com/stretchr/testify/suite" ) diff --git a/go/internal/bitutils/bitmap_generate.go b/go/internal/bitutils/bitmap_generate.go new file mode 100644 index 0000000000000..25b17bfb797d4 --- /dev/null +++ b/go/internal/bitutils/bitmap_generate.go @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bitutils + +import "github.com/apache/arrow/go/v10/arrow/bitutil" + +// GenerateBits writes sequential bits to a bitmap. Bits preceding the +// initial start offset are preserved, bits following the bitmap may +// get clobbered. +func GenerateBits(bitmap []byte, start, length int64, g func() bool) { + if length == 0 { + return + } + + cur := bitmap[start/8:] + mask := bitutil.BitMask[start%8] + curbyte := cur[0] & bitutil.PrecedingBitmask[start%8] + + for i := int64(0); i < length; i++ { + bit := g() + if bit { + curbyte = curbyte | mask + } + mask <<= 1 + if mask == 0 { + mask = 1 + cur[0] = curbyte + cur = cur[1:] + curbyte = 0 + } + } + + if mask != 1 { + cur[0] = curbyte + } +} + +// GenerateBitsUnrolled is like GenerateBits but unrolls its main loop for +// higher performance. +// +// See the benchmarks for evidence. +func GenerateBitsUnrolled(bitmap []byte, start, length int64, g func() bool) { + if length == 0 { + return + } + + var ( + curbyte byte + cur = bitmap[start/8:] + startBitOffset uint64 = uint64(start % 8) + mask = bitutil.BitMask[startBitOffset] + remaining = length + ) + + if mask != 0x01 { + curbyte = cur[0] & bitutil.PrecedingBitmask[startBitOffset] + for mask != 0 && remaining > 0 { + if g() { + curbyte |= mask + } + mask <<= 1 + remaining-- + } + cur[0] = curbyte + cur = cur[1:] + } + + var outResults [8]byte + for remainingBytes := remaining / 8; remainingBytes > 0; remainingBytes-- { + for i := 0; i < 8; i++ { + if g() { + outResults[i] = 1 + } + } + cur[0] = (outResults[0] | outResults[1]<<1 | outResults[2]<<2 | + outResults[3]<<3 | outResults[4]<<4 | outResults[5]<<5 | + outResults[6]<<6 | outResults[7]<<7) + cur = cur[1:] + } + + remainingBits := remaining % 8 + if remainingBits > 0 { + curbyte = 0 + mask = 0x01 + for ; remainingBits > 0; remainingBits-- { + if g() { + curbyte |= mask + } + mask <<= 1 + } + cur[0] = curbyte + } +} diff --git a/go/internal/bitutils/bitmap_generate_test.go b/go/internal/bitutils/bitmap_generate_test.go new file mode 100644 index 0000000000000..07700449774ca --- /dev/null +++ b/go/internal/bitutils/bitmap_generate_test.go @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bitutils_test + +import ( + "testing" + + "github.com/apache/arrow/go/v10/internal/bitutils" + "golang.org/x/exp/rand" +) + +const kBufferSize int64 = 1024 * 8 + +var pattern = []bool{false, false, false, true, true, true} + +func runBench(b *testing.B, bitmap []byte, nbits int64, fn func([]byte, int64, int64, func() bool)) { + for n := 0; n < b.N; n++ { + patternIndex := 0 + gen := func() bool { + b := pattern[patternIndex] + patternIndex++ + if patternIndex == len(pattern) { + patternIndex = 0 + } + return b + } + + fn(bitmap, 0, nbits, gen) + } +} + +func BenchmarkGenerateBits(b *testing.B) { + nbits := kBufferSize * 8 + // random bytes + r := rand.New(rand.NewSource(0)) + bitmap := make([]byte, kBufferSize) + r.Read(bitmap) + + b.ResetTimer() + b.SetBytes(kBufferSize) + runBench(b, bitmap, nbits, bitutils.GenerateBits) +} + +func BenchmarkGenerateBitsUnrolled(b *testing.B) { + nbits := kBufferSize * 8 + // random bytes + r := rand.New(rand.NewSource(0)) + bitmap := make([]byte, kBufferSize) + r.Read(bitmap) + + b.ResetTimer() + b.SetBytes(kBufferSize) + runBench(b, bitmap, nbits, bitutils.GenerateBitsUnrolled) +} diff --git a/go/internal/hashing/xxh3_memo_table.gen.go b/go/internal/hashing/xxh3_memo_table.gen.go index 3078c4e5a15e3..57d57d970b420 100644 --- a/go/internal/hashing/xxh3_memo_table.gen.go +++ b/go/internal/hashing/xxh3_memo_table.gen.go @@ -21,9 +21,9 @@ package hashing import ( "math" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/internal/utils" ) type payloadInt8 struct { diff --git a/go/internal/hashing/xxh3_memo_table.gen.go.tmpl b/go/internal/hashing/xxh3_memo_table.gen.go.tmpl index d932d46658824..ac6d3d1468709 100644 --- a/go/internal/hashing/xxh3_memo_table.gen.go.tmpl +++ b/go/internal/hashing/xxh3_memo_table.gen.go.tmpl @@ -17,8 +17,8 @@ package hashing import ( - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/internal/utils" ) {{range .In}} diff --git a/go/internal/hashing/xxh3_memo_table.go b/go/internal/hashing/xxh3_memo_table.go index 0657be8f007dc..1a33420f4368e 100644 --- a/go/internal/hashing/xxh3_memo_table.go +++ b/go/internal/hashing/xxh3_memo_table.go @@ -26,7 +26,7 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/parquet" "github.com/zeebo/xxh3" ) diff --git a/go/internal/utils/min_max_ppc64le.go b/go/internal/utils/min_max_ppc64le.go new file mode 100644 index 0000000000000..ed1b0e69343a4 --- /dev/null +++ b/go/internal/utils/min_max_ppc64le.go @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package utils + +func init() { + minmaxFuncs.i8 = int8MinMax + minmaxFuncs.ui8 = uint8MinMax + minmaxFuncs.i16 = int16MinMax + minmaxFuncs.ui16 = uint16MinMax + minmaxFuncs.i32 = int32MinMax + minmaxFuncs.ui32 = uint32MinMax + minmaxFuncs.i64 = int64MinMax + minmaxFuncs.ui64 = uint64MinMax +} diff --git a/go/internal/utils/transpose_ints_def.go b/go/internal/utils/transpose_ints_def.go index d18045529edc0..7d7bff7135236 100644 --- a/go/internal/utils/transpose_ints_def.go +++ b/go/internal/utils/transpose_ints_def.go @@ -19,7 +19,7 @@ package utils import ( "errors" - "github.com/apache/arrow/go/v9/arrow" + "github.com/apache/arrow/go/v10/arrow" ) //go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata -d arch=avx2 transpose_ints_simd.go.tmpl=transpose_ints_avx2_amd64.go diff --git a/go/internal/utils/transpose_ints_ppc64le.go b/go/internal/utils/transpose_ints_ppc64le.go new file mode 100644 index 0000000000000..2650cebe75917 --- /dev/null +++ b/go/internal/utils/transpose_ints_ppc64le.go @@ -0,0 +1,97 @@ +// Code generated by transpose_ints_s390x.go.tmpl. DO NOT EDIT. + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package utils + +// if building with the 'noasm' tag, then point to the pure go implementations +var ( + TransposeInt8Int8 = transposeInt8Int8 + TransposeInt8Uint8 = transposeInt8Uint8 + TransposeInt8Int16 = transposeInt8Int16 + TransposeInt8Uint16 = transposeInt8Uint16 + TransposeInt8Int32 = transposeInt8Int32 + TransposeInt8Uint32 = transposeInt8Uint32 + TransposeInt8Int64 = transposeInt8Int64 + TransposeInt8Uint64 = transposeInt8Uint64 + + TransposeUint8Int8 = transposeUint8Int8 + TransposeUint8Uint8 = transposeUint8Uint8 + TransposeUint8Int16 = transposeUint8Int16 + TransposeUint8Uint16 = transposeUint8Uint16 + TransposeUint8Int32 = transposeUint8Int32 + TransposeUint8Uint32 = transposeUint8Uint32 + TransposeUint8Int64 = transposeUint8Int64 + TransposeUint8Uint64 = transposeUint8Uint64 + + TransposeInt16Int8 = transposeInt16Int8 + TransposeInt16Uint8 = transposeInt16Uint8 + TransposeInt16Int16 = transposeInt16Int16 + TransposeInt16Uint16 = transposeInt16Uint16 + TransposeInt16Int32 = transposeInt16Int32 + TransposeInt16Uint32 = transposeInt16Uint32 + TransposeInt16Int64 = transposeInt16Int64 + TransposeInt16Uint64 = transposeInt16Uint64 + + TransposeUint16Int8 = transposeUint16Int8 + TransposeUint16Uint8 = transposeUint16Uint8 + TransposeUint16Int16 = transposeUint16Int16 + TransposeUint16Uint16 = transposeUint16Uint16 + TransposeUint16Int32 = transposeUint16Int32 + TransposeUint16Uint32 = transposeUint16Uint32 + TransposeUint16Int64 = transposeUint16Int64 + TransposeUint16Uint64 = transposeUint16Uint64 + + TransposeInt32Int8 = transposeInt32Int8 + TransposeInt32Uint8 = transposeInt32Uint8 + TransposeInt32Int16 = transposeInt32Int16 + TransposeInt32Uint16 = transposeInt32Uint16 + TransposeInt32Int32 = transposeInt32Int32 + TransposeInt32Uint32 = transposeInt32Uint32 + TransposeInt32Int64 = transposeInt32Int64 + TransposeInt32Uint64 = transposeInt32Uint64 + + TransposeUint32Int8 = transposeUint32Int8 + TransposeUint32Uint8 = transposeUint32Uint8 + TransposeUint32Int16 = transposeUint32Int16 + TransposeUint32Uint16 = transposeUint32Uint16 + TransposeUint32Int32 = transposeUint32Int32 + TransposeUint32Uint32 = transposeUint32Uint32 + TransposeUint32Int64 = transposeUint32Int64 + TransposeUint32Uint64 = transposeUint32Uint64 + + TransposeInt64Int8 = transposeInt64Int8 + TransposeInt64Uint8 = transposeInt64Uint8 + TransposeInt64Int16 = transposeInt64Int16 + TransposeInt64Uint16 = transposeInt64Uint16 + TransposeInt64Int32 = transposeInt64Int32 + TransposeInt64Uint32 = transposeInt64Uint32 + TransposeInt64Int64 = transposeInt64Int64 + TransposeInt64Uint64 = transposeInt64Uint64 + + TransposeUint64Int8 = transposeUint64Int8 + TransposeUint64Uint8 = transposeUint64Uint8 + TransposeUint64Int16 = transposeUint64Int16 + TransposeUint64Uint16 = transposeUint64Uint16 + TransposeUint64Int32 = transposeUint64Int32 + TransposeUint64Uint32 = transposeUint64Uint32 + TransposeUint64Int64 = transposeUint64Int64 + TransposeUint64Uint64 = transposeUint64Uint64 +) diff --git a/go/internal/utils/transpose_ints_test.go b/go/internal/utils/transpose_ints_test.go index 245ae551293ca..4c69dd1c3afb9 100644 --- a/go/internal/utils/transpose_ints_test.go +++ b/go/internal/utils/transpose_ints_test.go @@ -23,7 +23,7 @@ import ( "math/rand" "testing" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/internal/utils" ) var ( diff --git a/go/parquet/cmd/parquet_reader/dumper.go b/go/parquet/cmd/parquet_reader/dumper.go index 340892a56634e..c58a696b2b86a 100644 --- a/go/parquet/cmd/parquet_reader/dumper.go +++ b/go/parquet/cmd/parquet_reader/dumper.go @@ -22,9 +22,9 @@ import ( "reflect" "time" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/schema" ) const defaultBatchSize = 128 diff --git a/go/parquet/cmd/parquet_reader/main.go b/go/parquet/cmd/parquet_reader/main.go index d86177a5e67f5..dfca6fbf148ca 100644 --- a/go/parquet/cmd/parquet_reader/main.go +++ b/go/parquet/cmd/parquet_reader/main.go @@ -26,10 +26,10 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/metadata" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/docopt/docopt-go" ) @@ -230,8 +230,12 @@ func main() { scanners := make([]*Dumper, len(selectedColumns)) fields := make([]string, len(selectedColumns)) for idx, c := range selectedColumns { - scanners[idx] = createDumper(rgr.Column(c)) - fields[idx] = rgr.Column(c).Descriptor().Path() + col, err := rgr.Column(c) + if err != nil { + log.Fatalf("unable to fetch column=%d err=%s", c, err) + } + scanners[idx] = createDumper(col) + fields[idx] = col.Descriptor().Path() } var line string @@ -283,8 +287,12 @@ func main() { if idx > 0 { fmt.Fprint(dataOut, ",") } - scanners[idx] = createDumper(rgr.Column(c)) - fmt.Fprintf(dataOut, "%q", rgr.Column(c).Descriptor().Path()) + col, err := rgr.Column(c) + if err != nil { + log.Fatalf("unable to fetch col=%d err=%s", c, err) + } + scanners[idx] = createDumper(col) + fmt.Fprintf(dataOut, "%q", col.Descriptor().Path()) } fmt.Fprintln(dataOut) @@ -334,8 +342,12 @@ func main() { scanners := make([]*Dumper, len(selectedColumns)) for idx, c := range selectedColumns { - scanners[idx] = createDumper(rgr.Column(c)) - fmt.Fprintf(dataOut, fmt.Sprintf("%%-%ds|", colwidth), rgr.Column(c).Descriptor().Name()) + col, err := rgr.Column(c) + if err != nil { + log.Fatalf("unable to fetch column=%d err=%s", c, err) + } + scanners[idx] = createDumper(col) + fmt.Fprintf(dataOut, fmt.Sprintf("%%-%ds|", colwidth), col.Descriptor().Name()) } fmt.Fprintln(dataOut) diff --git a/go/parquet/cmd/parquet_schema/main.go b/go/parquet/cmd/parquet_schema/main.go index b79d538179345..73f1ecddefd98 100644 --- a/go/parquet/cmd/parquet_schema/main.go +++ b/go/parquet/cmd/parquet_schema/main.go @@ -20,8 +20,8 @@ import ( "fmt" "os" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/docopt/docopt-go" ) diff --git a/go/parquet/compress/brotli.go b/go/parquet/compress/brotli.go index 88905ed75a39f..5494cba6064ab 100644 --- a/go/parquet/compress/brotli.go +++ b/go/parquet/compress/brotli.go @@ -22,7 +22,7 @@ import ( "io/ioutil" "github.com/andybalholm/brotli" - "github.com/apache/arrow/go/v9/parquet/internal/debug" + "github.com/apache/arrow/go/v10/parquet/internal/debug" ) type brotliCodec struct{} diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go index 19f28b826ac3c..c81b8c4b1291c 100644 --- a/go/parquet/compress/compress.go +++ b/go/parquet/compress/compress.go @@ -24,7 +24,7 @@ import ( "io" "io/ioutil" - "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" ) // Compression is an alias to the thrift compression codec enum type for easy use diff --git a/go/parquet/compress/compress_test.go b/go/parquet/compress/compress_test.go index d12d6d95361dc..181f9cf7b6823 100644 --- a/go/parquet/compress/compress_test.go +++ b/go/parquet/compress/compress_test.go @@ -22,7 +22,7 @@ import ( "math/rand" "testing" - "github.com/apache/arrow/go/v9/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/compress" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/compress/zstd.go b/go/parquet/compress/zstd.go index e4ed962ef3fc2..1640a0b3cc70f 100644 --- a/go/parquet/compress/zstd.go +++ b/go/parquet/compress/zstd.go @@ -20,7 +20,7 @@ import ( "io" "sync" - "github.com/apache/arrow/go/v9/parquet/internal/debug" + "github.com/apache/arrow/go/v10/parquet/internal/debug" "github.com/klauspost/compress/zstd" ) diff --git a/go/parquet/doc.go b/go/parquet/doc.go index d4e745543e61f..7c97dd5c95028 100644 --- a/go/parquet/doc.go +++ b/go/parquet/doc.go @@ -26,9 +26,9 @@ // Install // // You can download the library and cli utilities via: -// go get -u github.com/apache/arrow/go/v9/parquet -// go install github.com/apache/arrow/go/v9/parquet/cmd/parquet_reader@latest -// go install github.com/apache/arrow/go/v9/parquet/cmd/parquet_schema@latest +// go get -u github.com/apache/arrow/go/v10/parquet +// go install github.com/apache/arrow/go/v10/parquet/cmd/parquet_reader@latest +// go install github.com/apache/arrow/go/v10/parquet/cmd/parquet_schema@latest // // Modules // diff --git a/go/parquet/encryption_properties.go b/go/parquet/encryption_properties.go index b2d7038ed2e7a..e589ddc0f5e27 100644 --- a/go/parquet/encryption_properties.go +++ b/go/parquet/encryption_properties.go @@ -20,7 +20,7 @@ import ( "crypto/rand" "unicode/utf8" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" ) // Constants that will be used as the default values with encryption/decryption diff --git a/go/parquet/encryption_properties_test.go b/go/parquet/encryption_properties_test.go index 71776bc82dee8..dee3ec15c18eb 100644 --- a/go/parquet/encryption_properties_test.go +++ b/go/parquet/encryption_properties_test.go @@ -19,8 +19,8 @@ package parquet_test import ( "testing" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/encryption_read_config_test.go b/go/parquet/encryption_read_config_test.go index f3427d15cd22b..fd18be5007f55 100644 --- a/go/parquet/encryption_read_config_test.go +++ b/go/parquet/encryption_read_config_test.go @@ -23,10 +23,10 @@ import ( "path" "testing" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" "github.com/stretchr/testify/suite" ) @@ -185,7 +185,10 @@ func (d *TestDecryptionSuite) decryptFile(filename string, decryptConfigNum int) rowsRead := int64(0) // get col reader for boolean column - colReader := rowGroupReader.Column(0) + colReader, err := rowGroupReader.Column(0) + if err != nil { + panic(err) + } boolReader := colReader.(*file.BooleanColumnChunkReader) // get column chunk metadata for boolean column @@ -210,7 +213,10 @@ func (d *TestDecryptionSuite) decryptFile(filename string, decryptConfigNum int) d.EqualValues(i, boolMd.NumValues()) // Get column reader for int32 column - colReader = rowGroupReader.Column(1) + colReader, err = rowGroupReader.Column(1) + if err != nil { + panic(err) + } int32reader := colReader.(*file.Int32ColumnChunkReader) int32md, _ := rgMeta.ColumnChunk(1) @@ -232,7 +238,10 @@ func (d *TestDecryptionSuite) decryptFile(filename string, decryptConfigNum int) d.EqualValues(i, int32md.NumValues()) // Get column reader for int64 column - colReader = rowGroupReader.Column(2) + colReader, err = rowGroupReader.Column(2) + if err != nil { + panic(err) + } int64reader := colReader.(*file.Int64ColumnChunkReader) int64md, _ := rgMeta.ColumnChunk(2) @@ -265,7 +274,10 @@ func (d *TestDecryptionSuite) decryptFile(filename string, decryptConfigNum int) d.EqualValues(i, int64md.NumValues()) // Get column reader for int96 column - colReader = rowGroupReader.Column(3) + colReader, err = rowGroupReader.Column(3) + if err != nil { + panic(err) + } int96reader := colReader.(*file.Int96ColumnChunkReader) int96md, _ := rgMeta.ColumnChunk(3) @@ -297,7 +309,10 @@ func (d *TestDecryptionSuite) decryptFile(filename string, decryptConfigNum int) // try to read them during the plaintext test. if props.FileDecryptProps != nil { // Get column reader for the float column - colReader = rowGroupReader.Column(4) + colReader, err = rowGroupReader.Column(4) + if err != nil { + panic(err) + } floatReader := colReader.(*file.Float32ColumnChunkReader) floatmd, _ := rgMeta.ColumnChunk(4) @@ -320,7 +335,10 @@ func (d *TestDecryptionSuite) decryptFile(filename string, decryptConfigNum int) d.EqualValues(i, floatmd.NumValues()) // Get column reader for the double column - colReader = rowGroupReader.Column(5) + colReader, err = rowGroupReader.Column(5) + if err != nil { + panic(err) + } dblReader := colReader.(*file.Float64ColumnChunkReader) dblmd, _ := rgMeta.ColumnChunk(5) @@ -343,7 +361,10 @@ func (d *TestDecryptionSuite) decryptFile(filename string, decryptConfigNum int) d.EqualValues(i, dblmd.NumValues()) } - colReader = rowGroupReader.Column(6) + colReader, err = rowGroupReader.Column(6) + if err != nil { + panic(err) + } bareader := colReader.(*file.ByteArrayColumnChunkReader) bamd, _ := rgMeta.ColumnChunk(6) diff --git a/go/parquet/encryption_write_config_test.go b/go/parquet/encryption_write_config_test.go index c8320b5fbf640..66cabaf9f85a3 100644 --- a/go/parquet/encryption_write_config_test.go +++ b/go/parquet/encryption_write_config_test.go @@ -24,10 +24,10 @@ import ( "path/filepath" "testing" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/compress" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/file/column_reader.go b/go/parquet/file/column_reader.go index fe962b5774e85..f0d8074588418 100644 --- a/go/parquet/file/column_reader.go +++ b/go/parquet/file/column_reader.go @@ -18,14 +18,15 @@ package file import ( "fmt" - - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" + "sync" + + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/xerrors" ) @@ -125,6 +126,7 @@ type columnChunkReader struct { // the number of values we've decoded so far numDecoded int64 mem memory.Allocator + bufferPool *sync.Pool decoders map[format.Encoding]encoding.TypedDecoder decoderTraits encoding.DecoderTraits @@ -136,8 +138,12 @@ type columnChunkReader struct { // NewColumnReader returns a column reader for the provided column initialized with the given pagereader that will // provide the pages of data for this column. The type is determined from the column passed in. -func NewColumnReader(descr *schema.Column, pageReader PageReader, mem memory.Allocator) ColumnChunkReader { - base := columnChunkReader{descr: descr, rdr: pageReader, mem: mem, decoders: make(map[format.Encoding]encoding.TypedDecoder)} +// +// In addition to the page reader and allocator, a pointer to a shared sync.Pool is expected to provide buffers for temporary +// usage to minimize allocations. The bufferPool should provide *memory.Buffer objects that can be resized as necessary, buffers +// should have `ResizeNoShrink(0)` called on them before being put back into the pool. +func NewColumnReader(descr *schema.Column, pageReader PageReader, mem memory.Allocator, bufferPool *sync.Pool) ColumnChunkReader { + base := columnChunkReader{descr: descr, rdr: pageReader, mem: mem, decoders: make(map[format.Encoding]encoding.TypedDecoder), bufferPool: bufferPool} switch descr.PhysicalType() { case parquet.Types.FixedLenByteArray: base.decoderTraits = &encoding.FixedLenByteArrayDecoderTraits @@ -273,8 +279,12 @@ func (c *columnChunkReader) initLevelDecodersV2(page *DataPageV2) (int64, error) if c.descr.MaxRepetitionLevel() > 0 { c.repetitionDecoder.SetDataV2(page.repLvlByteLen, c.descr.MaxRepetitionLevel(), int(c.numBuffered), buf) - buf = buf[page.repLvlByteLen:] } + // ARROW-17453: Some writers will write repetition levels even when + // the max repetition level is 0, so we should respect the value + // in the page header regardless of whether MaxRepetitionLevel is 0 + // or not. + buf = buf[page.repLvlByteLen:] if c.descr.MaxDefinitionLevel() > 0 { c.definitionDecoder.SetDataV2(page.defLvlByteLen, c.descr.MaxDefinitionLevel(), int(c.numBuffered), buf) @@ -435,15 +445,17 @@ func (c *columnChunkReader) skipValues(nvalues int64, readFn func(batch int64, b valsRead int64 = 0 ) - // TODO(ARROW-16790): ideally we should re-use a shared pool of buffers to avoid unnecessary memory allocation for skips - scratch := memory.NewResizableBuffer(c.mem) + scratch := c.bufferPool.Get().(*memory.Buffer) + defer func() { + scratch.ResizeNoShrink(0) + c.bufferPool.Put(scratch) + }() bufMult := 1 if c.descr.PhysicalType() == parquet.Types.Boolean { // for bools, BytesRequired returns 1 byte per 8 bool, but casting []byte to []bool requires 1 byte per 1 bool bufMult = 8 } scratch.Reserve(c.decoderTraits.BytesRequired(int(batchSize) * bufMult)) - defer scratch.Release() for { batchSize = utils.Min(batchSize, toskip) diff --git a/go/parquet/file/column_reader_test.go b/go/parquet/file/column_reader_test.go index c9be7e366d130..ce7ff9e22e0e0 100755 --- a/go/parquet/file/column_reader_test.go +++ b/go/parquet/file/column_reader_test.go @@ -20,14 +20,16 @@ import ( "math" "math/rand" "reflect" + "runtime" + "sync" "testing" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/internal/testutils" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/internal/testutils" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) @@ -173,10 +175,25 @@ type PrimitiveReaderSuite struct { nvalues int maxDefLvl int16 maxRepLvl int16 + + bufferPool sync.Pool +} + +func (p *PrimitiveReaderSuite) SetupTest() { + p.bufferPool = sync.Pool{ + New: func() interface{} { + buf := memory.NewResizableBuffer(mem) + runtime.SetFinalizer(buf, func(obj *memory.Buffer) { + obj.Release() + }) + return buf + }, + } } func (p *PrimitiveReaderSuite) TearDownTest() { p.clear() + p.bufferPool = sync.Pool{} } func (p *PrimitiveReaderSuite) initReader(d *schema.Column) { @@ -185,7 +202,7 @@ func (p *PrimitiveReaderSuite) initReader(d *schema.Column) { m.TestData().Set("pages", p.pages) m.On("Err").Return((error)(nil)) p.pager = m - p.reader = file.NewColumnReader(d, m, mem) + p.reader = file.NewColumnReader(d, m, mem, &p.bufferPool) } func (p *PrimitiveReaderSuite) checkResults(typ reflect.Type) { @@ -514,6 +531,40 @@ func (p *PrimitiveReaderSuite) TestInt32FlatRequiredSkip() { }) } +func (p *PrimitiveReaderSuite) TestRepetitionLvlBytesWithMaxRepZero() { + const batchSize = 4 + p.maxDefLvl = 1 + p.maxRepLvl = 0 + typ := schema.NewInt32Node("a", parquet.Repetitions.Optional, -1) + descr := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) + // Bytes here came from the example parquet file in ARROW-17453's int32 + // column which was delta bit-packed. The key part is the first three + // bytes: the page header reports 1 byte for repetition levels even + // though the max rep level is 0. If that byte isn't skipped then + // we get def levels of [1, 1, 0, 0] instead of the correct [1, 1, 1, 0]. + pageData := [...]byte{0x3, 0x3, 0x7, 0x80, 0x1, 0x4, 0x3, + 0x18, 0x1, 0x2, 0x0, 0x0, 0x0, 0xc, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} + + p.pages = append(p.pages, file.NewDataPageV2(memory.NewBufferBytes(pageData[:]), batchSize, 1, batchSize, + parquet.Encodings.DeltaBinaryPacked, 2, 1, int32(len(pageData)), false)) + + p.initReader(descr) + p.NotPanics(func() { p.reader.HasNext() }) + + var ( + values [4]int32 + defLvls [4]int16 + ) + i32Rdr := p.reader.(*file.Int32ColumnChunkReader) + total, read, err := i32Rdr.ReadBatch(batchSize, values[:], defLvls[:], nil) + p.NoError(err) + p.EqualValues(batchSize, total) + p.EqualValues(3, read) + p.Equal([]int16{1, 1, 1, 0}, defLvls[:]) + p.Equal([]int32{12, 11, 13, 0}, values[:]) +} + func (p *PrimitiveReaderSuite) TestDictionaryEncodedPages() { p.maxDefLvl = 0 p.maxRepLvl = 0 diff --git a/go/parquet/file/column_reader_types.gen.go b/go/parquet/file/column_reader_types.gen.go index 6163b35b7a86c..43b60037af2a2 100644 --- a/go/parquet/file/column_reader_types.gen.go +++ b/go/parquet/file/column_reader_types.gen.go @@ -21,9 +21,9 @@ package file import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" ) // Int32ColumnChunkReader is the Typed Column chunk reader instance for reading diff --git a/go/parquet/file/column_reader_types.gen.go.tmpl b/go/parquet/file/column_reader_types.gen.go.tmpl index 6a83d389c356c..783bc6c4145f6 100644 --- a/go/parquet/file/column_reader_types.gen.go.tmpl +++ b/go/parquet/file/column_reader_types.gen.go.tmpl @@ -17,8 +17,8 @@ package file import ( - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" ) {{range .In}} diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go index 43e06c4d3ebc0..bb4975a5d482a 100755 --- a/go/parquet/file/column_writer.go +++ b/go/parquet/file/column_writer.go @@ -21,14 +21,14 @@ import ( "encoding/binary" "io" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/metadata" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet/schema" ) //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=../internal/encoding/physical_types.tmpldata column_writer_types.gen.go.tmpl diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go index 45e46bfb2f771..39eeb06f23c0c 100755 --- a/go/parquet/file/column_writer_test.go +++ b/go/parquet/file/column_writer_test.go @@ -20,21 +20,23 @@ import ( "bytes" "math" "reflect" + "runtime" + "sync" "testing" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - arrutils "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/compress" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/testutils" - "github.com/apache/arrow/go/v9/parquet/internal/utils" - "github.com/apache/arrow/go/v9/parquet/metadata" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + arrutils "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/testutils" + "github.com/apache/arrow/go/v10/parquet/internal/utils" + "github.com/apache/arrow/go/v10/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" @@ -223,6 +225,8 @@ type PrimitiveWriterTestSuite struct { metadata *metadata.ColumnChunkMetaDataBuilder sink *encoding.BufferWriter readbuffer *memory.Buffer + + bufferPool sync.Pool } func (p *PrimitiveWriterTestSuite) SetupTest() { @@ -230,12 +234,26 @@ func (p *PrimitiveWriterTestSuite) SetupTest() { p.props = parquet.NewWriterProperties() p.SetupSchema(parquet.Repetitions.Required, 1) p.descr = p.Schema.Column(0) + + p.bufferPool = sync.Pool{ + New: func() interface{} { + buf := memory.NewResizableBuffer(mem) + runtime.SetFinalizer(buf, func(obj *memory.Buffer) { + obj.Release() + }) + return buf + }, + } +} + +func (p *PrimitiveWriterTestSuite) TearDownTest() { + p.bufferPool = sync.Pool{} } func (p *PrimitiveWriterTestSuite) buildReader(nrows int64, compression compress.Compression) file.ColumnChunkReader { p.readbuffer = p.sink.Finish() pagereader, _ := file.NewPageReader(arrutils.NewBufferedReader(bytes.NewReader(p.readbuffer.Bytes()), p.readbuffer.Len()), nrows, compression, mem, nil) - return file.NewColumnReader(p.descr, pagereader, mem) + return file.NewColumnReader(p.descr, pagereader, mem, &p.bufferPool) } func (p *PrimitiveWriterTestSuite) buildWriter(_ int64, columnProps parquet.ColumnProperties, version parquet.Version) file.ColumnChunkWriter { diff --git a/go/parquet/file/column_writer_types.gen.go b/go/parquet/file/column_writer_types.gen.go index fca500e626f68..6b4a740c88fcb 100644 --- a/go/parquet/file/column_writer_types.gen.go +++ b/go/parquet/file/column_writer_types.gen.go @@ -21,10 +21,10 @@ package file import ( "fmt" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/column_writer_types.gen.go.tmpl b/go/parquet/file/column_writer_types.gen.go.tmpl index 1c4d326519fa1..4304484edec91 100644 --- a/go/parquet/file/column_writer_types.gen.go.tmpl +++ b/go/parquet/file/column_writer_types.gen.go.tmpl @@ -19,10 +19,10 @@ package file import ( "fmt" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/metadata" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" ) {{range .In}} diff --git a/go/parquet/file/file_reader.go b/go/parquet/file/file_reader.go index 93ce19885c520..d9a73faa63288 100644 --- a/go/parquet/file/file_reader.go +++ b/go/parquet/file/file_reader.go @@ -22,12 +22,13 @@ import ( "fmt" "io" "os" + "runtime" + "sync" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - "github.com/apache/arrow/go/v9/parquet/metadata" - "golang.org/x/exp/mmap" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + "github.com/apache/arrow/go/v10/parquet/metadata" "golang.org/x/xerrors" ) @@ -48,47 +49,8 @@ type Reader struct { metadata *metadata.FileMetaData footerOffset int64 fileDecryptor encryption.FileDecryptor -} - -// an adapter for mmap'd files -type mmapAdapter struct { - *mmap.ReaderAt - pos int64 -} - -func (m *mmapAdapter) Close() error { - return m.ReaderAt.Close() -} - -func (m *mmapAdapter) ReadAt(p []byte, off int64) (int, error) { - return m.ReaderAt.ReadAt(p, off) -} - -func (m *mmapAdapter) Read(p []byte) (n int, err error) { - n, err = m.ReaderAt.ReadAt(p, m.pos) - m.pos += int64(n) - return -} - -func (m *mmapAdapter) Seek(offset int64, whence int) (int64, error) { - newPos, offs := int64(0), offset - switch whence { - case io.SeekStart: - newPos = offs - case io.SeekCurrent: - newPos = m.pos + offs - case io.SeekEnd: - newPos = int64(m.ReaderAt.Len()) + offs - } - if newPos < 0 { - return 0, xerrors.New("negative result pos") - } - if newPos > int64(m.ReaderAt.Len()) { - return 0, xerrors.New("new position exceeds size of file") - } - m.pos = newPos - return newPos, nil + bufferPool sync.Pool } type ReadOption func(*Reader) @@ -119,11 +81,10 @@ func OpenParquetFile(filename string, memoryMap bool, opts ...ReadOption) (*Read var err error if memoryMap { - rdr, err := mmap.Open(filename) + source, err = mmapOpen(filename) if err != nil { return nil, err } - source = &mmapAdapter{rdr, 0} } else { source, err = os.Open(filename) if err != nil { @@ -156,6 +117,16 @@ func NewParquetReader(r parquet.ReaderAtSeeker, opts ...ReadOption) (*Reader, er f.props = parquet.NewReaderProperties(memory.NewGoAllocator()) } + f.bufferPool = sync.Pool{ + New: func() interface{} { + buf := memory.NewResizableBuffer(f.props.Allocator()) + runtime.SetFinalizer(buf, func(obj *memory.Buffer) { + obj.Release() + }) + return buf + }, + } + if f.metadata == nil { return f, f.parseMetaData() } @@ -163,6 +134,14 @@ func NewParquetReader(r parquet.ReaderAtSeeker, opts ...ReadOption) (*Reader, er return f, nil } +// BufferPool returns the internal buffer pool being utilized by this reader. +// This is primarily for use by the pqarrow.FileReader or anything that builds +// on top of the Reader and constructs their own ColumnReaders (like the +// RecordReader) +func (f *Reader) BufferPool() *sync.Pool { + return &f.bufferPool +} + // Close will close the current reader, and if the underlying reader being used // is an `io.Closer` then Close will be called on it too. func (f *Reader) Close() error { @@ -333,5 +312,6 @@ func (f *Reader) RowGroup(i int) *RowGroupReader { r: f.r, sourceSz: f.footerOffset, fileDecryptor: f.fileDecryptor, + bufferPool: &f.bufferPool, } } diff --git a/go/parquet/file/file_reader_mmap.go b/go/parquet/file/file_reader_mmap.go new file mode 100644 index 0000000000000..0890ba5eb0660 --- /dev/null +++ b/go/parquet/file/file_reader_mmap.go @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !windows +// +build !windows + +package file + +import ( + "io" + + "github.com/apache/arrow/go/v10/parquet" + "golang.org/x/exp/mmap" + "golang.org/x/xerrors" +) + +func mmapOpen(filename string) (parquet.ReaderAtSeeker, error) { + rdr, err := mmap.Open(filename) + if err != nil { + return nil, err + } + return &mmapAdapter{rdr, 0}, nil +} + +// an adapter for mmap'd files +type mmapAdapter struct { + *mmap.ReaderAt + + pos int64 +} + +func (m *mmapAdapter) Close() error { + return m.ReaderAt.Close() +} + +func (m *mmapAdapter) ReadAt(p []byte, off int64) (int, error) { + return m.ReaderAt.ReadAt(p, off) +} + +func (m *mmapAdapter) Read(p []byte) (n int, err error) { + n, err = m.ReaderAt.ReadAt(p, m.pos) + m.pos += int64(n) + return +} + +func (m *mmapAdapter) Seek(offset int64, whence int) (int64, error) { + newPos, offs := int64(0), offset + switch whence { + case io.SeekStart: + newPos = offs + case io.SeekCurrent: + newPos = m.pos + offs + case io.SeekEnd: + newPos = int64(m.ReaderAt.Len()) + offs + } + if newPos < 0 { + return 0, xerrors.New("negative result pos") + } + if newPos > int64(m.ReaderAt.Len()) { + return 0, xerrors.New("new position exceeds size of file") + } + m.pos = newPos + return newPos, nil +} diff --git a/go/parquet/file/file_reader_mmap_windows.go b/go/parquet/file/file_reader_mmap_windows.go new file mode 100644 index 0000000000000..b0d6162afbb7e --- /dev/null +++ b/go/parquet/file/file_reader_mmap_windows.go @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows +// +build windows + +package file + +import ( + "errors" + + "github.com/apache/arrow/go/v10/parquet" +) + +func mmapOpen(filename string) (parquet.ReaderAtSeeker, error) { + return nil, errors.New("mmap not implemented on windows") +} diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go index f0588ed55b709..d1706e5608470 100644 --- a/go/parquet/file/file_reader_test.go +++ b/go/parquet/file/file_reader_test.go @@ -23,14 +23,14 @@ import ( "math/rand" "testing" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet/compress" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/thrift" - "github.com/apache/arrow/go/v9/parquet/metadata" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/thrift" + "github.com/apache/arrow/go/v10/parquet/metadata" libthrift "github.com/apache/thrift/lib/go/thrift" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" diff --git a/go/parquet/file/file_writer.go b/go/parquet/file/file_writer.go index a7ba78215b9a3..5f2f1d30074ae 100644 --- a/go/parquet/file/file_writer.go +++ b/go/parquet/file/file_writer.go @@ -20,11 +20,11 @@ import ( "encoding/binary" "io" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - "github.com/apache/arrow/go/v9/parquet/internal/utils" - "github.com/apache/arrow/go/v9/parquet/metadata" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + "github.com/apache/arrow/go/v10/parquet/internal/utils" + "github.com/apache/arrow/go/v10/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet/schema" ) // Writer is the primary interface for writing a parquet file diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go index e7d9deca6e141..477c35ffb1080 100644 --- a/go/parquet/file/file_writer_test.go +++ b/go/parquet/file/file_writer_test.go @@ -21,13 +21,13 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/compress" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/internal/testutils" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/internal/testutils" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) @@ -115,7 +115,8 @@ func (t *SerializeTestSuite) fileSerializeTest(codec compress.Compression, expec t.False(chunk.HasIndexPage()) t.DefLevelsOut = make([]int16, t.rowsPerRG) t.RepLevelsOut = make([]int16, t.rowsPerRG) - colReader := rgr.Column(i) + colReader, err := rgr.Column(i) + t.NoError(err) t.SetupValuesOut(int64(t.rowsPerRG)) valuesRead = t.ReadBatch(colReader, int64(t.rowsPerRG), 0, t.DefLevelsOut, t.RepLevelsOut) t.EqualValues(t.rowsPerRG, valuesRead) @@ -310,7 +311,9 @@ func TestBufferedMultiPageDisabledDictionary(t *testing.T) { assert.EqualValues(t, valueCount, rgr.NumRows()) var totalRead int64 - colReader := rgr.Column(0).(*file.Int32ColumnChunkReader) + col, err := rgr.Column(0) + assert.NoError(t, err) + colReader := col.(*file.Int32ColumnChunkReader) for colReader.HasNext() { total, _, _ := colReader.ReadBatch(valueCount-totalRead, valuesOut[totalRead:], nil, nil) totalRead += total @@ -350,7 +353,9 @@ func TestAllNulls(t *testing.T) { assert.NoError(t, err) rgr := reader.RowGroup(0) - cr := rgr.Column(0).(*file.Int32ColumnChunkReader) + col, err := rgr.Column(0) + assert.NoError(t, err) + cr := col.(*file.Int32ColumnChunkReader) defLevels[0] = -1 defLevels[1] = -1 diff --git a/go/parquet/file/level_conversion.go b/go/parquet/file/level_conversion.go index e1a3401177fe0..af2029f4a305e 100755 --- a/go/parquet/file/level_conversion.go +++ b/go/parquet/file/level_conversion.go @@ -22,11 +22,11 @@ import ( "math/bits" "unsafe" - shared_utils "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/bmi" - "github.com/apache/arrow/go/v9/parquet/internal/utils" - "github.com/apache/arrow/go/v9/parquet/schema" + shared_utils "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/bmi" + "github.com/apache/arrow/go/v10/parquet/internal/utils" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/level_conversion_test.go b/go/parquet/file/level_conversion_test.go index ee69266081cbf..58298228074b3 100644 --- a/go/parquet/file/level_conversion_test.go +++ b/go/parquet/file/level_conversion_test.go @@ -20,9 +20,9 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/parquet/internal/bmi" - "github.com/apache/arrow/go/v9/parquet/internal/utils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/parquet/internal/bmi" + "github.com/apache/arrow/go/v10/parquet/internal/utils" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/file/page_reader.go b/go/parquet/file/page_reader.go index dc6a1f10aea74..71feeeb3f5e1c 100644 --- a/go/parquet/file/page_reader.go +++ b/go/parquet/file/page_reader.go @@ -23,13 +23,13 @@ import ( "sync" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/compress" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/thrift" - "github.com/apache/arrow/go/v9/parquet/metadata" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/thrift" + "github.com/apache/arrow/go/v10/parquet/metadata" "golang.org/x/xerrors" ) @@ -573,30 +573,27 @@ func (p *serializedPageReader) Next() bool { return false } - var pagebuf *memory.Buffer if compressed { if levelsBytelen > 0 { io.ReadFull(p.r, buf.Bytes()[:levelsBytelen]) } - var data []byte - if data, p.err = p.decompress(lenCompressed-levelsBytelen, buf.Bytes()[levelsBytelen:]); p.err != nil { + if _, p.err = p.decompress(lenCompressed-levelsBytelen, buf.Bytes()[levelsBytelen:]); p.err != nil { return false } - pagebuf = memory.NewBufferBytes(data) } else { io.ReadFull(p.r, buf.Bytes()) - pagebuf = buf - pagebuf.Retain() } - if pagebuf.Len() != lenUncompressed { - p.err = fmt.Errorf("parquet: metadata said %d bytes uncompressed data page, got %d bytes", lenUncompressed, pagebuf.Len()) + buf.Retain() + + if buf.Len() != lenUncompressed { + p.err = fmt.Errorf("parquet: metadata said %d bytes uncompressed data page, got %d bytes", lenUncompressed, buf.Len()) return false } // make datapage v2 p.curPage = &DataPageV2{ page: page{ - buf: pagebuf, + buf: buf, typ: p.curPageHdr.Type, nvals: dataHeader.GetNumValues(), encoding: dataHeader.GetEncoding(), diff --git a/go/parquet/file/page_writer.go b/go/parquet/file/page_writer.go index a74f4df443149..916e0eac55a0d 100644 --- a/go/parquet/file/page_writer.go +++ b/go/parquet/file/page_writer.go @@ -20,15 +20,15 @@ import ( "bytes" "sync" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/compress" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/thrift" - "github.com/apache/arrow/go/v9/parquet/internal/utils" - "github.com/apache/arrow/go/v9/parquet/metadata" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/thrift" + "github.com/apache/arrow/go/v10/parquet/internal/utils" + "github.com/apache/arrow/go/v10/parquet/metadata" libthrift "github.com/apache/thrift/lib/go/thrift" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/record_reader.go b/go/parquet/file/record_reader.go index 24be4dcd04051..3e45ee915fecf 100755 --- a/go/parquet/file/record_reader.go +++ b/go/parquet/file/record_reader.go @@ -18,18 +18,19 @@ package file import ( "fmt" + "sync" "sync/atomic" "unsafe" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/xerrors" ) @@ -127,9 +128,9 @@ type primitiveRecordReader struct { useValues bool } -func createPrimitiveRecordReader(descr *schema.Column, mem memory.Allocator) primitiveRecordReader { +func createPrimitiveRecordReader(descr *schema.Column, mem memory.Allocator, bufferPool *sync.Pool) primitiveRecordReader { return primitiveRecordReader{ - ColumnChunkReader: NewColumnReader(descr, nil, mem), + ColumnChunkReader: NewColumnReader(descr, nil, mem, bufferPool), values: memory.NewResizableBuffer(mem), validBits: memory.NewResizableBuffer(mem), mem: mem, @@ -326,12 +327,12 @@ func (b *binaryRecordReader) GetBuilderChunks() []arrow.Array { return b.recordReaderImpl.(binaryRecordReaderImpl).GetBuilderChunks() } -func newRecordReader(descr *schema.Column, info LevelInfo, mem memory.Allocator) RecordReader { +func newRecordReader(descr *schema.Column, info LevelInfo, mem memory.Allocator, bufferPool *sync.Pool) RecordReader { if mem == nil { mem = memory.DefaultAllocator } - pr := createPrimitiveRecordReader(descr, mem) + pr := createPrimitiveRecordReader(descr, mem, bufferPool) return &recordReader{ refCount: 1, recordReaderImpl: &pr, @@ -722,7 +723,7 @@ func (fr *flbaRecordReader) GetBuilderChunks() []arrow.Array { return []arrow.Array{fr.bldr.NewArray()} } -func newFLBARecordReader(descr *schema.Column, info LevelInfo, mem memory.Allocator) RecordReader { +func newFLBARecordReader(descr *schema.Column, info LevelInfo, mem memory.Allocator, bufferPool *sync.Pool) RecordReader { if mem == nil { mem = memory.DefaultAllocator } @@ -731,7 +732,7 @@ func newFLBARecordReader(descr *schema.Column, info LevelInfo, mem memory.Alloca return &binaryRecordReader{&recordReader{ recordReaderImpl: &flbaRecordReader{ - createPrimitiveRecordReader(descr, mem), + createPrimitiveRecordReader(descr, mem, bufferPool), array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: byteWidth}), nil, }, @@ -750,7 +751,7 @@ type byteArrayRecordReader struct { valueBuf []parquet.ByteArray } -func newByteArrayRecordReader(descr *schema.Column, info LevelInfo, mem memory.Allocator) RecordReader { +func newByteArrayRecordReader(descr *schema.Column, info LevelInfo, mem memory.Allocator, bufferPool *sync.Pool) RecordReader { if mem == nil { mem = memory.DefaultAllocator } @@ -762,7 +763,7 @@ func newByteArrayRecordReader(descr *schema.Column, info LevelInfo, mem memory.A return &binaryRecordReader{&recordReader{ recordReaderImpl: &byteArrayRecordReader{ - createPrimitiveRecordReader(descr, mem), + createPrimitiveRecordReader(descr, mem, bufferPool), array.NewBinaryBuilder(mem, dt), nil, }, @@ -840,13 +841,13 @@ func (br *byteArrayRecordReader) GetBuilderChunks() []arrow.Array { // TODO(mtopol): create optimized readers for dictionary types after ARROW-7286 is done -func NewRecordReader(descr *schema.Column, info LevelInfo, readDict bool, mem memory.Allocator) RecordReader { +func NewRecordReader(descr *schema.Column, info LevelInfo, readDict bool, mem memory.Allocator, bufferPool *sync.Pool) RecordReader { switch descr.PhysicalType() { case parquet.Types.ByteArray: - return newByteArrayRecordReader(descr, info, mem) + return newByteArrayRecordReader(descr, info, mem, bufferPool) case parquet.Types.FixedLenByteArray: - return newFLBARecordReader(descr, info, mem) + return newFLBARecordReader(descr, info, mem, bufferPool) default: - return newRecordReader(descr, info, mem) + return newRecordReader(descr, info, mem, bufferPool) } } diff --git a/go/parquet/file/row_group_reader.go b/go/parquet/file/row_group_reader.go index 5d383dd28c8f5..b2b5bcf15581f 100644 --- a/go/parquet/file/row_group_reader.go +++ b/go/parquet/file/row_group_reader.go @@ -18,11 +18,12 @@ package file import ( "fmt" + "sync" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - "github.com/apache/arrow/go/v9/parquet/metadata" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + "github.com/apache/arrow/go/v10/parquet/metadata" "golang.org/x/xerrors" ) @@ -38,6 +39,8 @@ type RowGroupReader struct { rgMetadata *metadata.RowGroupMetaData props *parquet.ReaderProperties fileDecryptor encryption.FileDecryptor + + bufferPool *sync.Pool } // MetaData returns the metadata of the current Row Group @@ -55,17 +58,17 @@ func (r *RowGroupReader) ByteSize() int64 { return r.rgMetadata.TotalByteSize() // Column returns a column reader for the requested (0-indexed) column // // panics if passed a column not in the range [0, NumColumns) -func (r *RowGroupReader) Column(i int) ColumnChunkReader { +func (r *RowGroupReader) Column(i int) (ColumnChunkReader, error) { if i >= r.NumColumns() || i < 0 { - panic(fmt.Errorf("parquet: trying to read column index %d but row group metadata only has %d columns", i, r.rgMetadata.NumColumns())) + return nil, fmt.Errorf("parquet: trying to read column index %d but row group metadata only has %d columns", i, r.rgMetadata.NumColumns()) } descr := r.fileMetadata.Schema.Column(i) pageRdr, err := r.GetColumnPageReader(i) if err != nil { - panic(fmt.Errorf("parquet: unable to initialize page reader: %w", err)) + return nil, fmt.Errorf("parquet: unable to initialize page reader: %w", err) } - return NewColumnReader(descr, pageRdr, r.props.Allocator()) + return NewColumnReader(descr, pageRdr, r.props.Allocator(), r.bufferPool), nil } func (r *RowGroupReader) GetColumnPageReader(i int) (PageReader, error) { diff --git a/go/parquet/file/row_group_writer.go b/go/parquet/file/row_group_writer.go index c4af39c64c338..043ffa85f7f05 100644 --- a/go/parquet/file/row_group_writer.go +++ b/go/parquet/file/row_group_writer.go @@ -17,10 +17,10 @@ package file import ( - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - "github.com/apache/arrow/go/v9/parquet/internal/utils" - "github.com/apache/arrow/go/v9/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + "github.com/apache/arrow/go/v10/parquet/internal/utils" + "github.com/apache/arrow/go/v10/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/file/row_group_writer_test.go b/go/parquet/file/row_group_writer_test.go index 8943ff7574fa9..b0b223e23b3f9 100644 --- a/go/parquet/file/row_group_writer_test.go +++ b/go/parquet/file/row_group_writer_test.go @@ -20,10 +20,10 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/apache/thrift/lib/go/thrift" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go b/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go new file mode 100644 index 0000000000000..498d5452e17ad --- /dev/null +++ b/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !noasm + +package bmi + +func init() { + funclist.extractBits = extractBitsGo + funclist.gtbitmap = greaterThanBitmapGo +} diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go index aa4bbbd993959..21b1e2dfcb842 100644 --- a/go/parquet/internal/encoding/boolean_decoder.go +++ b/go/parquet/internal/encoding/boolean_decoder.go @@ -17,10 +17,10 @@ package encoding import ( - "github.com/apache/arrow/go/v9/arrow/bitutil" - shared_utils "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/utils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + shared_utils "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/utils" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/boolean_encoder.go b/go/parquet/internal/encoding/boolean_encoder.go index 0c55ddd689827..02bfbb2e6ac89 100644 --- a/go/parquet/internal/encoding/boolean_encoder.go +++ b/go/parquet/internal/encoding/boolean_encoder.go @@ -17,9 +17,9 @@ package encoding import ( - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/utils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/utils" ) const ( diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go index 2d6a4d06a9558..228de70455727 100644 --- a/go/parquet/internal/encoding/byte_array_decoder.go +++ b/go/parquet/internal/encoding/byte_array_decoder.go @@ -19,8 +19,8 @@ package encoding import ( "encoding/binary" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/byte_array_encoder.go b/go/parquet/internal/encoding/byte_array_encoder.go index e41f835afd2d4..777d927c76eab 100644 --- a/go/parquet/internal/encoding/byte_array_encoder.go +++ b/go/parquet/internal/encoding/byte_array_encoder.go @@ -20,10 +20,10 @@ import ( "encoding/binary" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" ) // PlainByteArrayEncoder encodes byte arrays according to the spec for Plain encoding diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go index a9da6444667db..11ab21807c51b 100644 --- a/go/parquet/internal/encoding/decoder.go +++ b/go/parquet/internal/encoding/decoder.go @@ -20,13 +20,13 @@ import ( "bytes" "reflect" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/debug" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/utils" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/debug" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/utils" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go index 9b1190e9188a0..f008cc795d52a 100644 --- a/go/parquet/internal/encoding/delta_bit_packing.go +++ b/go/parquet/internal/encoding/delta_bit_packing.go @@ -22,11 +22,11 @@ import ( "math/bits" "reflect" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" - shared_utils "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/utils" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" + shared_utils "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/utils" "golang.org/x/xerrors" ) @@ -50,7 +50,8 @@ type deltaBitPackDecoder struct { deltaBitWidths *memory.Buffer deltaBitWidth byte - lastVal int64 + totalValues uint64 + lastVal int64 } // returns the number of bytes read so far @@ -85,15 +86,10 @@ func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) error { return xerrors.New("parquet: eof exception") } - var totalValues uint64 - if totalValues, ok = d.bitdecoder.GetVlqInt(); !ok { + if d.totalValues, ok = d.bitdecoder.GetVlqInt(); !ok { return xerrors.New("parquet: eof exception") } - if int(totalValues) != d.nvals { - return xerrors.New("parquet: mismatch between number of values and count in data header") - } - if d.lastVal, ok = d.bitdecoder.GetZigZagVlqInt(); !ok { return xerrors.New("parquet: eof exception") } diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go index 1f573de4ebb18..4a250a90ba9c2 100644 --- a/go/parquet/internal/encoding/delta_byte_array.go +++ b/go/parquet/internal/encoding/delta_byte_array.go @@ -17,9 +17,9 @@ package encoding import ( - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go index 02b2c35816d26..01566fcfc839a 100644 --- a/go/parquet/internal/encoding/delta_length_byte_array.go +++ b/go/parquet/internal/encoding/delta_length_byte_array.go @@ -17,9 +17,9 @@ package encoding import ( - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/encoder.go b/go/parquet/internal/encoding/encoder.go index 61eda39fc7cc2..88fbd204b0fab 100644 --- a/go/parquet/internal/encoding/encoder.go +++ b/go/parquet/internal/encoding/encoder.go @@ -20,14 +20,14 @@ import ( "math/bits" "reflect" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/parquet" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/utils" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/utils" + "github.com/apache/arrow/go/v10/parquet/schema" ) //go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata plain_encoder_types.gen.go.tmpl typed_encoder.gen.go.tmpl diff --git a/go/parquet/internal/encoding/encoding_benchmarks_test.go b/go/parquet/internal/encoding/encoding_benchmarks_test.go index c44a0d91a88a7..fd1285b4d9953 100644 --- a/go/parquet/internal/encoding/encoding_benchmarks_test.go +++ b/go/parquet/internal/encoding/encoding_benchmarks_test.go @@ -21,14 +21,14 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/hashing" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/internal/testutils" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/hashing" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/internal/testutils" + "github.com/apache/arrow/go/v10/parquet/schema" ) const ( diff --git a/go/parquet/internal/encoding/encoding_test.go b/go/parquet/internal/encoding/encoding_test.go index e7c9eea39b15c..eb95a2da5b970 100644 --- a/go/parquet/internal/encoding/encoding_test.go +++ b/go/parquet/internal/encoding/encoding_test.go @@ -22,13 +22,13 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/internal/testutils" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/internal/testutils" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go index 2fafb6546959d..425756d044b34 100644 --- a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go +++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go @@ -19,8 +19,8 @@ package encoding import ( "math" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go index e65b5260f017c..84aabf546d9ac 100644 --- a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go +++ b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go @@ -17,8 +17,8 @@ package encoding import ( - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/parquet" ) // PlainFixedLenByteArrayEncoder writes the raw bytes of the byte array diff --git a/go/parquet/internal/encoding/levels.go b/go/parquet/internal/encoding/levels.go index c180b804c4001..cf5a282916854 100644 --- a/go/parquet/internal/encoding/levels.go +++ b/go/parquet/internal/encoding/levels.go @@ -24,11 +24,11 @@ import ( "math/bits" "github.com/JohnCGriffin/overflow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - shared_utils "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/utils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + shared_utils "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/utils" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/levels_test.go b/go/parquet/internal/encoding/levels_test.go index 0658c2258e2b4..0260bbe676562 100644 --- a/go/parquet/internal/encoding/levels_test.go +++ b/go/parquet/internal/encoding/levels_test.go @@ -21,11 +21,11 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/internal/encoding/memo_table.go b/go/parquet/internal/encoding/memo_table.go index 6c4dca7f3e15e..6848d2a38bb95 100644 --- a/go/parquet/internal/encoding/memo_table.go +++ b/go/parquet/internal/encoding/memo_table.go @@ -20,11 +20,11 @@ import ( "math" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/hashing" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/hashing" + "github.com/apache/arrow/go/v10/parquet" ) //go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata memo_table_types.gen.go.tmpl diff --git a/go/parquet/internal/encoding/memo_table_test.go b/go/parquet/internal/encoding/memo_table_test.go index c8011be68f2ee..1f213eba77c33 100644 --- a/go/parquet/internal/encoding/memo_table_test.go +++ b/go/parquet/internal/encoding/memo_table_test.go @@ -20,11 +20,11 @@ import ( "math" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/hashing" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/hashing" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go b/go/parquet/internal/encoding/memo_table_types.gen.go index d21ad7d9676a6..6f4ceec51d9e3 100644 --- a/go/parquet/internal/encoding/memo_table_types.gen.go +++ b/go/parquet/internal/encoding/memo_table_types.gen.go @@ -19,8 +19,8 @@ package encoding import ( - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" ) // standard map based implementation of memo tables which can be more efficient diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl index 98bd6c8223e35..47cc6d2780fc4 100644 --- a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl +++ b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl @@ -17,7 +17,7 @@ package encoding import ( - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/parquet" ) // standard map based implementation of memo tables which can be more efficient diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go index e316a8084951c..a82fffb02ba24 100644 --- a/go/parquet/internal/encoding/plain_encoder_types.gen.go +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go @@ -24,11 +24,11 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/endian" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl index 7cb3f9c1d9c00..4fbb685816ced 100644 --- a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl @@ -20,10 +20,10 @@ import ( "encoding/binary" "fmt" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/internal/bitutils" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/internal/bitutils" ) var ( diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go index bff22a3141530..92d738ac4126b 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go +++ b/go/parquet/internal/encoding/typed_encoder.gen.go @@ -21,14 +21,14 @@ package encoding import ( "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/bitutils" - shared_utils "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/utils" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/bitutils" + shared_utils "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/utils" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl index 3326934c7efd2..c73dfe5530115 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl +++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl @@ -17,13 +17,13 @@ package encoding import ( - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/parquet/internal/utils" - shared_utils "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/internal/bitutils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/parquet/internal/utils" + shared_utils "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/internal/bitutils" ) // fully typed encoder interfaces to enable writing against encoder/decoders diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go index 947e0ddb438fd..1536f7e7c6fa7 100644 --- a/go/parquet/internal/encoding/types.go +++ b/go/parquet/internal/encoding/types.go @@ -20,10 +20,10 @@ import ( "io" "sync" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/encryption/aes.go b/go/parquet/internal/encryption/aes.go index f09a7f1f2fa05..8806ef8cb467b 100644 --- a/go/parquet/internal/encryption/aes.go +++ b/go/parquet/internal/encryption/aes.go @@ -29,7 +29,7 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/parquet" ) // important constants for handling the aes encryption diff --git a/go/parquet/internal/encryption/decryptor.go b/go/parquet/internal/encryption/decryptor.go index 96df49a89f06a..31e49efa9d0d9 100644 --- a/go/parquet/internal/encryption/decryptor.go +++ b/go/parquet/internal/encryption/decryptor.go @@ -19,8 +19,8 @@ package encryption import ( "io" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" ) // FileDecryptor is an interface used by the filereader for decrypting an diff --git a/go/parquet/internal/encryption/encryptor.go b/go/parquet/internal/encryption/encryptor.go index ce438d48bb54a..fa2f9e843d5b0 100644 --- a/go/parquet/internal/encryption/encryptor.go +++ b/go/parquet/internal/encryption/encryptor.go @@ -19,8 +19,8 @@ package encryption import ( "io" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" ) // FileEncryptor is the interface for constructing encryptors for the different diff --git a/go/parquet/internal/testutils/pagebuilder.go b/go/parquet/internal/testutils/pagebuilder.go index b157c10f8a4e4..79e75c1c34aca 100644 --- a/go/parquet/internal/testutils/pagebuilder.go +++ b/go/parquet/internal/testutils/pagebuilder.go @@ -22,13 +22,13 @@ import ( "io" "reflect" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/compress" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/mock" ) diff --git a/go/parquet/internal/testutils/primitive_typed.go b/go/parquet/internal/testutils/primitive_typed.go index 9c36c14650318..9c97d82a050dc 100644 --- a/go/parquet/internal/testutils/primitive_typed.go +++ b/go/parquet/internal/testutils/primitive_typed.go @@ -20,11 +20,11 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/metadata" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet/schema" ) type PrimitiveTypedTest struct { diff --git a/go/parquet/internal/testutils/random.go b/go/parquet/internal/testutils/random.go index 10df31fb2a118..2028c7f296217 100644 --- a/go/parquet/internal/testutils/random.go +++ b/go/parquet/internal/testutils/random.go @@ -24,13 +24,13 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/endian" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/pqarrow" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/pqarrow" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" diff --git a/go/parquet/internal/testutils/random_arrow.go b/go/parquet/internal/testutils/random_arrow.go index 50cea76ff1103..0213e9c975b5a 100644 --- a/go/parquet/internal/testutils/random_arrow.go +++ b/go/parquet/internal/testutils/random_arrow.go @@ -17,9 +17,9 @@ package testutils import ( - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "golang.org/x/exp/rand" ) diff --git a/go/parquet/internal/testutils/utils.go b/go/parquet/internal/testutils/utils.go index 423f68e4e88bc..8d214cf7c24f0 100644 --- a/go/parquet/internal/testutils/utils.go +++ b/go/parquet/internal/testutils/utils.go @@ -19,7 +19,7 @@ package testutils import ( "reflect" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/parquet" ) var typeToParquetTypeMap = map[reflect.Type]parquet.Type{ diff --git a/go/parquet/internal/thrift/helpers.go b/go/parquet/internal/thrift/helpers.go index 68b040f7ef024..5298bc2af11fc 100644 --- a/go/parquet/internal/thrift/helpers.go +++ b/go/parquet/internal/thrift/helpers.go @@ -23,7 +23,7 @@ import ( "context" "io" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" "github.com/apache/thrift/lib/go/thrift" ) diff --git a/go/parquet/internal/utils/bit_benchmark_test.go b/go/parquet/internal/utils/bit_benchmark_test.go index 09cce7416c817..a1547f2fb4d10 100644 --- a/go/parquet/internal/utils/bit_benchmark_test.go +++ b/go/parquet/internal/utils/bit_benchmark_test.go @@ -20,9 +20,9 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/parquet/internal/testutils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/parquet/internal/testutils" ) type linearBitRunReader struct { diff --git a/go/parquet/internal/utils/bit_packing_ppc64le.go b/go/parquet/internal/utils/bit_packing_ppc64le.go new file mode 100644 index 0000000000000..58f869c3f5d97 --- /dev/null +++ b/go/parquet/internal/utils/bit_packing_ppc64le.go @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !noasm + +package utils + +import "io" + +var unpack32 func(io.Reader, []uint32, int) int = unpack32Default diff --git a/go/parquet/internal/utils/bit_reader.go b/go/parquet/internal/utils/bit_reader.go index 345479f34a434..07056bdafffe8 100644 --- a/go/parquet/internal/utils/bit_reader.go +++ b/go/parquet/internal/utils/bit_reader.go @@ -24,10 +24,10 @@ import ( "reflect" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" ) // masks for grabbing the trailing bits based on the number of trailing bits desired diff --git a/go/parquet/internal/utils/bit_reader_test.go b/go/parquet/internal/utils/bit_reader_test.go index 3c50b6070b2d8..4cb2cb565640e 100644 --- a/go/parquet/internal/utils/bit_reader_test.go +++ b/go/parquet/internal/utils/bit_reader_test.go @@ -25,11 +25,11 @@ import ( "strconv" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet/internal/utils" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet/internal/utils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" "golang.org/x/exp/rand" diff --git a/go/parquet/internal/utils/bit_writer.go b/go/parquet/internal/utils/bit_writer.go index b0742eef415cc..298f8c5875689 100644 --- a/go/parquet/internal/utils/bit_writer.go +++ b/go/parquet/internal/utils/bit_writer.go @@ -21,7 +21,7 @@ import ( "io" "log" - "github.com/apache/arrow/go/v9/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/bitutil" ) // WriterAtBuffer is a convenience struct for providing a WriteAt function diff --git a/go/parquet/internal/utils/bitmap_writer.go b/go/parquet/internal/utils/bitmap_writer.go index 4c93e1d0aed20..4aafa48d4e243 100644 --- a/go/parquet/internal/utils/bitmap_writer.go +++ b/go/parquet/internal/utils/bitmap_writer.go @@ -20,7 +20,7 @@ import ( "encoding/binary" "math/bits" - "github.com/apache/arrow/go/v9/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/bitutil" ) // BitmapWriter is an interface for bitmap writers so that we can use multiple diff --git a/go/parquet/internal/utils/bitmap_writer_test.go b/go/parquet/internal/utils/bitmap_writer_test.go index 9ea55745366d0..968c7aefbfab4 100644 --- a/go/parquet/internal/utils/bitmap_writer_test.go +++ b/go/parquet/internal/utils/bitmap_writer_test.go @@ -22,8 +22,8 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/parquet/internal/utils" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/parquet/internal/utils" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/internal/utils/rle.go b/go/parquet/internal/utils/rle.go index a6ffaed57c5c8..1415474e1485b 100644 --- a/go/parquet/internal/utils/rle.go +++ b/go/parquet/internal/utils/rle.go @@ -25,10 +25,10 @@ import ( "io" "math" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go b/go/parquet/internal/utils/typed_rle_dict.gen.go index e482c62f37f98..4629b18df2bd4 100644 --- a/go/parquet/internal/utils/typed_rle_dict.gen.go +++ b/go/parquet/internal/utils/typed_rle_dict.gen.go @@ -19,9 +19,9 @@ package utils import ( - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl index b4a77c8d1af1e..79e411fafd644 100644 --- a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl +++ b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl @@ -17,9 +17,9 @@ package utils import ( - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/internal/utils" ) {{range .In}} diff --git a/go/parquet/internal/utils/unpack_bool_ppc64le.go b/go/parquet/internal/utils/unpack_bool_ppc64le.go new file mode 100644 index 0000000000000..d833c2b9d6280 --- /dev/null +++ b/go/parquet/internal/utils/unpack_bool_ppc64le.go @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build !noasm + +package utils + +// BytesToBools when built with the noasm tag will direct to the pure go implementation +// for converting a bitmap to a slice of bools +func BytesToBools(in []byte, out []bool) { + bytesToBoolsGo(in, out) +} diff --git a/go/parquet/metadata/app_version.go b/go/parquet/metadata/app_version.go index 6b605cb977a97..b6938617ee6ad 100644 --- a/go/parquet/metadata/app_version.go +++ b/go/parquet/metadata/app_version.go @@ -21,8 +21,8 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" ) var ( diff --git a/go/parquet/metadata/column_chunk.go b/go/parquet/metadata/column_chunk.go index 88e32cc92f327..98b1a4fb6b700 100644 --- a/go/parquet/metadata/column_chunk.go +++ b/go/parquet/metadata/column_chunk.go @@ -22,13 +22,13 @@ import ( "io" "reflect" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/compress" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/thrift" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/thrift" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/metadata/file.go b/go/parquet/metadata/file.go index 0a8e2c7bdd162..f5b504094189e 100644 --- a/go/parquet/metadata/file.go +++ b/go/parquet/metadata/file.go @@ -24,12 +24,12 @@ import ( "reflect" "unicode/utf8" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/compress" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/thrift" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/thrift" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/metadata/metadata_test.go b/go/parquet/metadata/metadata_test.go index 2e11b5bce9422..6c64de749c728 100644 --- a/go/parquet/metadata/metadata_test.go +++ b/go/parquet/metadata/metadata_test.go @@ -21,9 +21,9 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/metadata" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/metadata/row_group.go b/go/parquet/metadata/row_group.go index 61f030f3915e7..fb9a8cc9035bf 100644 --- a/go/parquet/metadata/row_group.go +++ b/go/parquet/metadata/row_group.go @@ -20,10 +20,10 @@ import ( "fmt" "reflect" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encryption" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encryption" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" ) // RowGroupMetaData is a proxy around the thrift RowGroup meta data object diff --git a/go/parquet/metadata/stat_compare_test.go b/go/parquet/metadata/stat_compare_test.go index 6b6b26d6467ab..81095babb6424 100644 --- a/go/parquet/metadata/stat_compare_test.go +++ b/go/parquet/metadata/stat_compare_test.go @@ -20,8 +20,8 @@ import ( "encoding/binary" "testing" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/metadata/statistics.go b/go/parquet/metadata/statistics.go index 71265c0ec1dc8..7a87494c706a3 100644 --- a/go/parquet/metadata/statistics.go +++ b/go/parquet/metadata/statistics.go @@ -22,14 +22,14 @@ import ( "math" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/debug" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/debug" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" ) //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=../internal/encoding/physical_types.tmpldata statistics_types.gen.go.tmpl diff --git a/go/parquet/metadata/statistics_test.go b/go/parquet/metadata/statistics_test.go index 531b632b4b3dc..d0c52c9b32600 100644 --- a/go/parquet/metadata/statistics_test.go +++ b/go/parquet/metadata/statistics_test.go @@ -21,11 +21,11 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/metadata" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/metadata/statistics_types.gen.go b/go/parquet/metadata/statistics_types.gen.go index 5fc2033586d4c..53779db50cc90 100644 --- a/go/parquet/metadata/statistics_types.gen.go +++ b/go/parquet/metadata/statistics_types.gen.go @@ -22,13 +22,13 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/bitutils" - shared_utils "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/bitutils" + shared_utils "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/metadata/statistics_types.gen.go.tmpl b/go/parquet/metadata/statistics_types.gen.go.tmpl index 5aa19676fb20b..78064a6988d05 100644 --- a/go/parquet/metadata/statistics_types.gen.go.tmpl +++ b/go/parquet/metadata/statistics_types.gen.go.tmpl @@ -19,12 +19,12 @@ package metadata import ( "fmt" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" - "github.com/apache/arrow/go/v9/parquet/internal/utils" - shared_utils "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/internal/bitutils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" + "github.com/apache/arrow/go/v10/parquet/internal/utils" + shared_utils "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/internal/bitutils" ) {{range .In}} diff --git a/go/parquet/pqarrow/column_readers.go b/go/parquet/pqarrow/column_readers.go index 68de9054a2606..73577b616ee63 100644 --- a/go/parquet/pqarrow/column_readers.go +++ b/go/parquet/pqarrow/column_readers.go @@ -20,19 +20,20 @@ import ( "encoding/binary" "fmt" "reflect" + "sync" "sync/atomic" "time" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/sync/errgroup" "golang.org/x/xerrors" ) @@ -50,13 +51,13 @@ type leafReader struct { refCount int64 } -func newLeafReader(rctx *readerCtx, field *arrow.Field, input *columnIterator, leafInfo file.LevelInfo, props ArrowReadProperties) (*ColumnReader, error) { +func newLeafReader(rctx *readerCtx, field *arrow.Field, input *columnIterator, leafInfo file.LevelInfo, props ArrowReadProperties, bufferPool *sync.Pool) (*ColumnReader, error) { ret := &leafReader{ rctx: rctx, field: field, input: input, descr: input.Descr(), - recordRdr: file.NewRecordReader(input.Descr(), leafInfo, field.Type.ID() == arrow.DICTIONARY, rctx.mem), + recordRdr: file.NewRecordReader(input.Descr(), leafInfo, field.Type.ID() == arrow.DICTIONARY, rctx.mem, bufferPool), props: props, refCount: 1, } diff --git a/go/parquet/pqarrow/encode_arrow.go b/go/parquet/pqarrow/encode_arrow.go index e662d2bd5e127..ba20a3318f1a7 100644 --- a/go/parquet/pqarrow/encode_arrow.go +++ b/go/parquet/pqarrow/encode_arrow.go @@ -24,14 +24,14 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" "golang.org/x/xerrors" ) diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go index 7f799e2338749..c9aeb19c4a28d 100644 --- a/go/parquet/pqarrow/encode_arrow_test.go +++ b/go/parquet/pqarrow/encode_arrow_test.go @@ -26,20 +26,20 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/bitutil" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/compress" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" - "github.com/apache/arrow/go/v9/parquet/internal/testutils" - "github.com/apache/arrow/go/v9/parquet/pqarrow" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/compress" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/parquet/internal/testutils" + "github.com/apache/arrow/go/v10/parquet/pqarrow" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -165,13 +165,14 @@ func TestWriteArrowCols(t *testing.T) { var ( total int64 read int - err error defLevelsOut = make([]int16, int(expected.NumRows())) arr = expected.Column(i).Data().Chunk(0) ) switch expected.Schema().Field(i).Type.(arrow.FixedWidthDataType).BitWidth() { case 32: - colReader := rgr.Column(i).(*file.Int32ColumnChunkReader) + col, err := rgr.Column(i) + assert.NoError(t, err) + colReader := col.(*file.Int32ColumnChunkReader) vals := make([]int32, int(expected.NumRows())) total, read, err = colReader.ReadBatch(expected.NumRows(), vals, defLevelsOut, nil) require.NoError(t, err) @@ -191,7 +192,9 @@ func TestWriteArrowCols(t *testing.T) { } } case 64: - colReader := rgr.Column(i).(*file.Int64ColumnChunkReader) + col, err := rgr.Column(i) + assert.NoError(t, err) + colReader := col.(*file.Int64ColumnChunkReader) vals := make([]int64, int(expected.NumRows())) total, read, err = colReader.ReadBatch(expected.NumRows(), vals, defLevelsOut, nil) require.NoError(t, err) @@ -258,7 +261,8 @@ func TestWriteArrowInt96(t *testing.T) { assert.EqualValues(t, 1, reader.NumRowGroups()) rgr := reader.RowGroup(0) - tsRdr := rgr.Column(3) + tsRdr, err := rgr.Column(3) + assert.NoError(t, err) assert.Equal(t, parquet.Types.Int96, tsRdr.Type()) rdr := tsRdr.(*file.Int96ColumnChunkReader) diff --git a/go/parquet/pqarrow/file_reader.go b/go/parquet/pqarrow/file_reader.go index 5a8205bbf5691..f62b4571b8e9e 100755 --- a/go/parquet/pqarrow/file_reader.go +++ b/go/parquet/pqarrow/file_reader.go @@ -23,13 +23,13 @@ import ( "sync" "sync/atomic" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/arrio" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/arrio" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/sync/errgroup" "golang.org/x/xerrors" ) @@ -210,7 +210,7 @@ func (fr *FileReader) GetFieldReaders(ctx context.Context, colIndices, rowGroups // greatly improves performance. // GetFieldReader causes read operations, when issued serially on large numbers of columns, // this is super time consuming. Get field readers concurrently. - g,gctx := errgroup.WithContext(ctx) + g, gctx := errgroup.WithContext(ctx) if !fr.Props.Parallel { g.SetLimit(1) } @@ -482,7 +482,7 @@ func (fr *FileReader) getReader(ctx context.Context, field *SchemaField, arrowFi return nil, nil } - out, err = newLeafReader(&rctx, field.Field, rctx.colFactory(field.ColIndex, rctx.rdr), field.LevelInfo, fr.Props) + out, err = newLeafReader(&rctx, field.Field, rctx.colFactory(field.ColIndex, rctx.rdr), field.LevelInfo, fr.Props, fr.rdr.BufferPool()) return } @@ -499,7 +499,7 @@ func (fr *FileReader) getReader(ctx context.Context, field *SchemaField, arrowFi // When reading structs with large numbers of columns, the serial load is very slow. // This is especially true when reading Cloud Storage. Loading concurrently // greatly improves performance. - g,gctx := errgroup.WithContext(ctx) + g, gctx := errgroup.WithContext(ctx) if !fr.Props.Parallel { g.SetLimit(1) } diff --git a/go/parquet/pqarrow/file_reader_test.go b/go/parquet/pqarrow/file_reader_test.go index 0a3ed623c92b7..416bf8169b09d 100644 --- a/go/parquet/pqarrow/file_reader_test.go +++ b/go/parquet/pqarrow/file_reader_test.go @@ -24,13 +24,13 @@ import ( "path/filepath" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/decimal128" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/pqarrow" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/pqarrow" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/pqarrow/file_writer.go b/go/parquet/pqarrow/file_writer.go index 96a29abba3a8b..9a44b7f08f731 100644 --- a/go/parquet/pqarrow/file_writer.go +++ b/go/parquet/pqarrow/file_writer.go @@ -22,12 +22,12 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/flight" - "github.com/apache/arrow/go/v9/internal/utils" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/metadata" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/internal/utils" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/metadata" "golang.org/x/xerrors" ) diff --git a/go/parquet/pqarrow/path_builder.go b/go/parquet/pqarrow/path_builder.go index 2c551731aef4b..692834736d516 100644 --- a/go/parquet/pqarrow/path_builder.go +++ b/go/parquet/pqarrow/path_builder.go @@ -20,11 +20,11 @@ import ( "sync/atomic" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/bitutils" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/bitutils" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" "golang.org/x/xerrors" ) diff --git a/go/parquet/pqarrow/path_builder_test.go b/go/parquet/pqarrow/path_builder_test.go index 832b0bc2aa49e..3be692845aae8 100644 --- a/go/parquet/pqarrow/path_builder_test.go +++ b/go/parquet/pqarrow/path_builder_test.go @@ -20,9 +20,9 @@ import ( "context" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/pqarrow/properties.go b/go/parquet/pqarrow/properties.go index 4a2556e668025..a994bf7d2650a 100755 --- a/go/parquet/pqarrow/properties.go +++ b/go/parquet/pqarrow/properties.go @@ -19,9 +19,9 @@ package pqarrow import ( "context" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet/internal/encoding" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet/internal/encoding" ) // ArrowWriterProperties are used to determine how to manipulate the arrow data diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go index a9d825ff66272..91dd6b6b7ec15 100644 --- a/go/parquet/pqarrow/reader_writer_test.go +++ b/go/parquet/pqarrow/reader_writer_test.go @@ -22,12 +22,12 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/array" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/pqarrow" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/pqarrow" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" ) diff --git a/go/parquet/pqarrow/schema.go b/go/parquet/pqarrow/schema.go index cae564ba6dd6e..d5ad304b2d9e8 100644 --- a/go/parquet/pqarrow/schema.go +++ b/go/parquet/pqarrow/schema.go @@ -23,13 +23,13 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/flight" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/file" - "github.com/apache/arrow/go/v9/parquet/metadata" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/file" + "github.com/apache/arrow/go/v10/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet/schema" "golang.org/x/xerrors" ) diff --git a/go/parquet/pqarrow/schema_test.go b/go/parquet/pqarrow/schema_test.go index ae17ec057f39a..c05d2792d8c9d 100644 --- a/go/parquet/pqarrow/schema_test.go +++ b/go/parquet/pqarrow/schema_test.go @@ -20,13 +20,13 @@ import ( "encoding/base64" "testing" - "github.com/apache/arrow/go/v9/arrow" - "github.com/apache/arrow/go/v9/arrow/flight" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/metadata" - "github.com/apache/arrow/go/v9/parquet/pqarrow" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/flight" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/metadata" + "github.com/apache/arrow/go/v10/parquet/pqarrow" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) diff --git a/go/parquet/reader_properties.go b/go/parquet/reader_properties.go index a7ddd631f45ee..0e80118b93282 100644 --- a/go/parquet/reader_properties.go +++ b/go/parquet/reader_properties.go @@ -21,8 +21,8 @@ import ( "fmt" "io" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/internal/utils" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/internal/utils" ) // ReaderProperties are used to define how the file reader will handle buffering and allocating buffers diff --git a/go/parquet/reader_writer_properties_test.go b/go/parquet/reader_writer_properties_test.go index d500a8814a605..d666c248f26ff 100644 --- a/go/parquet/reader_writer_properties_test.go +++ b/go/parquet/reader_writer_properties_test.go @@ -20,9 +20,9 @@ import ( "bytes" "testing" - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/compress" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/compress" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/column.go b/go/parquet/schema/column.go index 463648425ea94..14a3bfd3318f9 100644 --- a/go/parquet/schema/column.go +++ b/go/parquet/schema/column.go @@ -20,8 +20,8 @@ import ( "fmt" "strings" - "github.com/apache/arrow/go/v9/parquet" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" ) // Column encapsulates the information necessary to interpret primitive diff --git a/go/parquet/schema/converted_types.go b/go/parquet/schema/converted_types.go index 7c9f52528b6e1..27f5e3fb8f85c 100644 --- a/go/parquet/schema/converted_types.go +++ b/go/parquet/schema/converted_types.go @@ -17,7 +17,7 @@ package schema import ( - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" ) // ConvertedType corresponds to the ConvertedType in the parquet.Thrift, diff --git a/go/parquet/schema/converted_types_test.go b/go/parquet/schema/converted_types_test.go index 7000b1be453a4..6eb0238bf197d 100644 --- a/go/parquet/schema/converted_types_test.go +++ b/go/parquet/schema/converted_types_test.go @@ -19,7 +19,7 @@ package schema_test import ( "testing" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/helpers.go b/go/parquet/schema/helpers.go index a0ba2cc779d7e..656825e7fc51b 100644 --- a/go/parquet/schema/helpers.go +++ b/go/parquet/schema/helpers.go @@ -17,7 +17,7 @@ package schema import ( - "github.com/apache/arrow/go/v9/parquet" + "github.com/apache/arrow/go/v10/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/schema/helpers_test.go b/go/parquet/schema/helpers_test.go index ba8d012919020..ea5c732350637 100644 --- a/go/parquet/schema/helpers_test.go +++ b/go/parquet/schema/helpers_test.go @@ -21,8 +21,8 @@ import ( "strings" "testing" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/logical_types.go b/go/parquet/schema/logical_types.go index ed279a9c99fb7..b9ebd5f13a26d 100644 --- a/go/parquet/schema/logical_types.go +++ b/go/parquet/schema/logical_types.go @@ -21,9 +21,9 @@ import ( "fmt" "math" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/internal/debug" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/internal/debug" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" ) // DecimalMetadata is a struct for managing scale and precision information between diff --git a/go/parquet/schema/logical_types_test.go b/go/parquet/schema/logical_types_test.go index 081454867479b..8f80d2025304d 100644 --- a/go/parquet/schema/logical_types_test.go +++ b/go/parquet/schema/logical_types_test.go @@ -20,8 +20,8 @@ import ( "encoding/json" "testing" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/node.go b/go/parquet/schema/node.go index 7ce9e8d2bcb08..6e2feae85965a 100644 --- a/go/parquet/schema/node.go +++ b/go/parquet/schema/node.go @@ -19,8 +19,8 @@ package schema import ( "fmt" - "github.com/apache/arrow/go/v9/parquet" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" "github.com/apache/thrift/lib/go/thrift" "golang.org/x/xerrors" ) diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go index 0fbf1d6ae4606..f6495c72f039c 100644 --- a/go/parquet/schema/reflection.go +++ b/go/parquet/schema/reflection.go @@ -22,8 +22,8 @@ import ( "strconv" "strings" - "github.com/apache/arrow/go/v9/parquet" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/schema/reflection_test.go b/go/parquet/schema/reflection_test.go index deb9c92aeb1aa..1fbec030cf726 100644 --- a/go/parquet/schema/reflection_test.go +++ b/go/parquet/schema/reflection_test.go @@ -22,8 +22,8 @@ import ( "reflect" "testing" - "github.com/apache/arrow/go/v9/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/stretchr/testify/assert" ) diff --git a/go/parquet/schema/schema.go b/go/parquet/schema/schema.go index 82aaa4060ca91..210c16cebd150 100644 --- a/go/parquet/schema/schema.go +++ b/go/parquet/schema/schema.go @@ -35,8 +35,8 @@ import ( "io" "strings" - "github.com/apache/arrow/go/v9/parquet" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" "golang.org/x/xerrors" ) diff --git a/go/parquet/schema/schema_element_test.go b/go/parquet/schema/schema_element_test.go index 5f50d7b42ec89..bf48ad924c583 100644 --- a/go/parquet/schema/schema_element_test.go +++ b/go/parquet/schema/schema_element_test.go @@ -19,8 +19,8 @@ package schema import ( "testing" - "github.com/apache/arrow/go/v9/parquet" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/schema/schema_flatten_test.go b/go/parquet/schema/schema_flatten_test.go index d6b7c1e38adbb..1dda48147db61 100644 --- a/go/parquet/schema/schema_flatten_test.go +++ b/go/parquet/schema/schema_flatten_test.go @@ -19,8 +19,8 @@ package schema import ( "testing" - "github.com/apache/arrow/go/v9/parquet" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" ) diff --git a/go/parquet/schema/schema_test.go b/go/parquet/schema/schema_test.go index a3efe1c85cb2a..1dec79c4dcfa0 100644 --- a/go/parquet/schema/schema_test.go +++ b/go/parquet/schema/schema_test.go @@ -20,9 +20,9 @@ import ( "os" "testing" - "github.com/apache/arrow/go/v9/parquet" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" - "github.com/apache/arrow/go/v9/parquet/schema" + "github.com/apache/arrow/go/v10/parquet" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/parquet/schema" "github.com/apache/thrift/lib/go/thrift" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" diff --git a/go/parquet/types.go b/go/parquet/types.go index 92db0e72f7d3d..cbdcf162f7df4 100644 --- a/go/parquet/types.go +++ b/go/parquet/types.go @@ -24,8 +24,8 @@ import ( "time" "unsafe" - "github.com/apache/arrow/go/v9/arrow" - format "github.com/apache/arrow/go/v9/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/v10/arrow" + format "github.com/apache/arrow/go/v10/parquet/internal/gen-go/parquet" ) const ( diff --git a/go/parquet/writer_properties.go b/go/parquet/writer_properties.go index 123cf212eb22b..0534746d70f77 100644 --- a/go/parquet/writer_properties.go +++ b/go/parquet/writer_properties.go @@ -17,8 +17,8 @@ package parquet import ( - "github.com/apache/arrow/go/v9/arrow/memory" - "github.com/apache/arrow/go/v9/parquet/compress" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/parquet/compress" ) // Constants for default property values used for the default reader, writer and column props. @@ -46,7 +46,7 @@ const ( DefaultStatsEnabled = true // If the stats are larger than 4K the writer will skip writing them out anyways. DefaultMaxStatsSize int64 = 4096 - DefaultCreatedBy = "parquet-go version 9.0.0-SNAPSHOT" + DefaultCreatedBy = "parquet-go version 10.0.0-SNAPSHOT" DefaultRootName = "schema" ) diff --git a/java/.gitignore b/java/.gitignore index f2b9d4a018d97..59c2e7b2a0c6d 100644 --- a/java/.gitignore +++ b/java/.gitignore @@ -25,4 +25,4 @@ target/ !/c/ # Generated properties file -flight/flight-jdbc-driver/src/main/resources/properties/flight.properties +flight/flight-sql-jdbc-driver/src/main/resources/properties/flight.properties diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index ac6eed6a191f4..9ca5d044c8efc 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -16,7 +16,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT ../../pom.xml diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java b/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java index a00cd7704d482..16d8e52722c44 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java @@ -63,8 +63,9 @@ public void init() { } protected Schema getSchema(String schemaName) throws Exception { - Path schemaPath = Paths.get(TestWriteReadAvroRecord.class.getResource("/").getPath(), + Path schemaPath = Paths.get(Paths.get(TestWriteReadAvroRecord.class.getResource("/").toURI()).toString(), "schema", schemaName); + return new Schema.Parser().parse(schemaPath.toFile()); } diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java b/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java index bf695d193e430..0a153a28cbc2e 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java @@ -48,7 +48,9 @@ public class TestWriteReadAvroRecord { public void testWriteAndRead() throws Exception { File dataFile = TMP.newFile(); - Path schemaPath = Paths.get(TestWriteReadAvroRecord.class.getResource("/").getPath(), "schema", "test.avsc"); + Path schemaPath = Paths.get( + Paths.get(TestWriteReadAvroRecord.class.getResource("/").toURI()).toString(), + "schema", "test.avsc"); Schema schema = new Schema.Parser().parse(schemaPath.toFile()); //write data to disk diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 8e5829a284297..aaadda0375f74 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -16,7 +16,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT ../../pom.xml diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java index 6496ca5a311e9..dc708724043d0 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java @@ -44,6 +44,8 @@ import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.types.pojo.ArrowType; /** @@ -78,7 +80,7 @@ public ColumnBinder visit(ArrowType.Struct type) { @Override public ColumnBinder visit(ArrowType.List type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); + return new ListBinder((ListVector) vector); } @Override @@ -98,7 +100,7 @@ public ColumnBinder visit(ArrowType.Union type) { @Override public ColumnBinder visit(ArrowType.Map type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); + return new MapBinder((MapVector) vector); } @Override diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java new file mode 100644 index 0000000000000..b8aa61234f4e9 --- /dev/null +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.binder; + +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Arrays; + +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.util.Text; + +/** + * A column binder for list of primitive values. + */ +public class ListBinder extends BaseColumnBinder { + + private final UnionListReader listReader; + private final Class arrayElementClass; + private final boolean isTextColumn; + + public ListBinder(ListVector vector) { + this(vector, java.sql.Types.ARRAY); + } + + /** + * Init ListBinder and determine type of data vector. + * + * @param vector corresponding data vector from arrow buffer for binding + * @param jdbcType parameter jdbc type + */ + public ListBinder(ListVector vector, int jdbcType) { + super(vector, jdbcType); + listReader = vector.getReader(); + Class dataVectorClass = vector.getDataVector().getClass(); + try { + arrayElementClass = dataVectorClass.getMethod("getObject", Integer.TYPE).getReturnType(); + } catch (NoSuchMethodException e) { + final String message = String.format("Issue to determine type for getObject method of data vector class %s ", + dataVectorClass.getName()); + throw new RuntimeException(message); + } + isTextColumn = arrayElementClass.isAssignableFrom(Text.class); + } + + @Override + public void bind(java.sql.PreparedStatement statement, int parameterIndex, int rowIndex)throws java.sql.SQLException { + listReader.setPosition(rowIndex); + ArrayList sourceArray = (ArrayList) listReader.readObject(); + Object array; + if (!isTextColumn) { + array = Array.newInstance(arrayElementClass, sourceArray.size()); + Arrays.setAll((Object[]) array, sourceArray::get); + } else { + array = new String[sourceArray.size()]; + Arrays.setAll((Object[]) array, idx -> sourceArray.get(idx) != null ? sourceArray.get(idx).toString() : null); + } + statement.setObject(parameterIndex, array); + } +} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java new file mode 100644 index 0000000000000..07391eb7cbfb4 --- /dev/null +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.binder; + +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Types; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Objects; + +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.impl.UnionMapReader; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.util.JsonStringHashMap; + +/** + * A column binder for map of primitive values. + */ +public class MapBinder extends BaseColumnBinder { + + private UnionMapReader reader; + private final boolean isTextKey; + private final boolean isTextValue; + + public MapBinder(MapVector vector) { + this(vector, Types.VARCHAR); + } + + /** + * Init MapBinder and determine type of data vector. + * + * @param vector corresponding data vector from arrow buffer for binding + * @param jdbcType parameter jdbc type + */ + public MapBinder(MapVector vector, int jdbcType) { + super(vector, jdbcType); + reader = vector.getReader(); + List structField = Objects.requireNonNull(vector.getField()).getChildren(); + if (structField.size() != 1) { + throw new IllegalArgumentException("Expected Struct field metadata inside Map field"); + } + List keyValueFields = Objects.requireNonNull(structField.get(0)).getChildren(); + if (keyValueFields.size() != 2) { + throw new IllegalArgumentException("Expected two children fields " + + "inside nested Struct field in Map"); + } + ArrowType keyType = Objects.requireNonNull(keyValueFields.get(0)).getType(); + ArrowType valueType = Objects.requireNonNull(keyValueFields.get(1)).getType(); + isTextKey = ArrowType.Utf8.INSTANCE.equals(keyType); + isTextValue = ArrowType.Utf8.INSTANCE.equals(valueType); + } + + @Override + public void bind(PreparedStatement statement, + int parameterIndex, int rowIndex) throws SQLException { + reader.setPosition(rowIndex); + LinkedHashMap tags = new JsonStringHashMap<>(); + while (reader.next()) { + Object key = reader.key().readObject(); + Object value = reader.value().readObject(); + tags.put(isTextKey && key != null ? key.toString() : key, + isTextValue && value != null ? value.toString() : value); + } + switch (jdbcType) { + case Types.VARCHAR: + statement.setString(parameterIndex, tags.toString()); + break; + case Types.OTHER: + default: + statement.setObject(parameterIndex, tags); + } + } +} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java index 15c56d11ceeb4..645e343ffd0ba 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java @@ -27,6 +27,7 @@ import java.util.Calendar; import java.util.HashMap; import java.util.Map; +import java.util.TimeZone; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -95,6 +96,7 @@ protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes" */ @Before public void setUp() throws SQLException, ClassNotFoundException { + TimeZone.setDefault(TimeZone.getTimeZone("UTC")); String url = "jdbc:h2:mem:JdbcToArrowTest"; String driver = "org.h2.Driver"; Class.forName(driver); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java index c8c043f2f0bcc..15b9ab0386159 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java @@ -30,9 +30,11 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.function.BiConsumer; import org.apache.arrow.adapter.jdbc.binder.ColumnBinder; +import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseLargeVariableWidthVector; @@ -67,12 +69,16 @@ import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.JsonStringHashMap; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -385,6 +391,166 @@ void decimal256() throws SQLException { Arrays.asList(new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); } + @Test + void listOfDouble() throws SQLException { + TriConsumer setValue = (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(doubleValue -> writer.float8().writeFloat8(doubleValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = Arrays.asList(new Double[]{0.0, Math.PI}, new Double[]{1.1, -352346.2, 2355.6}, + new Double[]{-1024.3}, new Double[]{}); + testListType(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), setValue, ListVector::setNull, values); + } + + @Test + void listOfInt64() throws SQLException { + TriConsumer setValue = (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(longValue -> writer.bigInt().writeBigInt(longValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = Arrays.asList(new Long[]{1L, 2L, 3L}, new Long[]{4L, 5L}, + new Long[]{512L, 1024L, 2048L, 4096L}, new Long[]{}); + testListType((ArrowType) new ArrowType.Int(64, true), setValue, ListVector::setNull, values); + } + + @Test + void listOfInt32() throws SQLException { + TriConsumer setValue = (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(integerValue -> writer.integer().writeInt(integerValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = Arrays.asList(new Integer[]{1, 2, 3}, new Integer[]{4, 5}, + new Integer[]{512, 1024, 2048, 4096}, new Integer[]{}); + testListType((ArrowType) new ArrowType.Int(32, true), setValue, ListVector::setNull, values); + } + + @Test + void listOfBoolean() throws SQLException { + TriConsumer setValue = (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(booleanValue -> writer.bit().writeBit(booleanValue ? 1 : 0)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = Arrays.asList(new Boolean[]{true, false}, + new Boolean[]{false, false}, new Boolean[]{true, true, false, true}, new Boolean[]{}); + testListType((ArrowType) new ArrowType.Bool(), setValue, ListVector::setNull, values); + } + + @Test + void listOfString() throws SQLException { + TriConsumer setValue = (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(stringValue -> { + if (stringValue != null) { + byte[] stringValueBytes = stringValue.getBytes(StandardCharsets.UTF_8); + try (ArrowBuf stringBuffer = allocator.buffer(stringValueBytes.length)) { + stringBuffer.writeBytes(stringValueBytes); + writer.varChar().writeVarChar(0, stringValueBytes.length, stringBuffer); + } + } else { + writer.varChar().writeNull(); + } + }); + writer.endList(); + listVector.setLastSet(index); + }; + List values = Arrays.asList(new String[]{"aaaa", "b1"}, + new String[]{"c", null, "d"}, new String[]{"e", "f", "g", "h"}, new String[]{}); + testListType((ArrowType) new ArrowType.Utf8(), setValue, ListVector::setNull, values); + } + + @Test + void mapOfString() throws SQLException { + TriConsumer> setValue = (mapVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(index); + mapWriter.startMap(); + values.entrySet().forEach(mapValue -> { + if (mapValue != null) { + byte[] keyBytes = mapValue.getKey().getBytes(StandardCharsets.UTF_8); + byte[] valueBytes = mapValue.getValue().getBytes(StandardCharsets.UTF_8); + try ( + ArrowBuf keyBuf = allocator.buffer(keyBytes.length); + ArrowBuf valueBuf = allocator.buffer(valueBytes.length); + ) { + mapWriter.startEntry(); + keyBuf.writeBytes(keyBytes); + valueBuf.writeBytes(valueBytes); + mapWriter.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); + mapWriter.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); + mapWriter.endEntry(); + } + } else { + mapWriter.writeNull(); + } + }); + mapWriter.endMap(); + }; + + JsonStringHashMap value1 = new JsonStringHashMap(); + value1.put("a", "b"); + value1.put("c", "d"); + JsonStringHashMap value2 = new JsonStringHashMap(); + value2.put("d", "e"); + value2.put("f", "g"); + value2.put("k", "l"); + JsonStringHashMap value3 = new JsonStringHashMap(); + value3.put("y", "z"); + value3.put("arrow", "cool"); + List> values = Arrays.asList(value1, value2, value3, Collections.emptyMap()); + testMapType(new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Utf8()); + } + + @Test + void mapOfInteger() throws SQLException { + TriConsumer> setValue = (mapVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(index); + mapWriter.startMap(); + values.entrySet().forEach(mapValue -> { + if (mapValue != null) { + mapWriter.startEntry(); + mapWriter.key().integer().writeInt(mapValue.getKey()); + mapWriter.value().integer().writeInt(mapValue.getValue()); + mapWriter.endEntry(); + } else { + mapWriter.writeNull(); + } + }); + mapWriter.endMap(); + }; + + JsonStringHashMap value1 = new JsonStringHashMap(); + value1.put(1, 2); + value1.put(3, 4); + JsonStringHashMap value2 = new JsonStringHashMap(); + value2.put(5, 6); + value2.put(7, 8); + value2.put(9, 1024); + JsonStringHashMap value3 = new JsonStringHashMap(); + value3.put(Integer.MIN_VALUE, Integer.MAX_VALUE); + value3.put(0, 4096); + List> values = Arrays.asList(value1, value2, value3, Collections.emptyMap()); + testMapType(new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Int(32, true)); + } + @FunctionalInterface interface TriConsumer { void accept(T value1, U value2, V value3); @@ -483,4 +649,211 @@ void testSimpleType(ArrowType arrowType, int jdbcType assertThat(binder.next()).isFalse(); } } + + void testListType(ArrowType arrowType, TriConsumer setValue, + BiConsumer setNull, List values) throws SQLException { + int jdbcType = Types.ARRAY; + Schema schema = new Schema(Collections.singletonList(new Field("field", FieldType.nullable( + new ArrowType.List()), Collections.singletonList( + new Field("element", FieldType.notNullable(arrowType), null) + )))); + try (final MockPreparedStatement statement = new MockPreparedStatement(); + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final JdbcParameterBinder binder = + JdbcParameterBinder.builder(statement, root).bindAll().build(); + assertThat(binder.next()).isFalse(); + + @SuppressWarnings("unchecked") + final V vector = (V) root.getVector(0); + final ColumnBinder columnBinder = ColumnBinder.forVector(vector); + assertThat(columnBinder.getJdbcType()).isEqualTo(jdbcType); + + setValue.accept(vector, 0, values.get(0)); + setValue.accept(vector, 1, values.get(1)); + setNull.accept(vector, 2); + root.setRowCount(3); + + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isNull(); + assertThat(statement.getParamType(1)).isEqualTo(jdbcType); + assertThat(binder.next()).isFalse(); + + binder.reset(); + + setNull.accept(vector, 0); + setValue.accept(vector, 1, values.get(3)); + setValue.accept(vector, 2, values.get(0)); + setValue.accept(vector, 3, values.get(2)); + setValue.accept(vector, 4, values.get(1)); + root.setRowCount(5); + + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isNull(); + assertThat(statement.getParamType(1)).isEqualTo(jdbcType); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(3)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); + assertThat(binder.next()).isFalse(); + } + + // Non-nullable (since some types have a specialized binder) + schema = new Schema(Collections.singletonList(new Field("field", FieldType.notNullable( + new ArrowType.List()), Collections.singletonList( + new Field("element", FieldType.notNullable(arrowType), null) + )))); + try (final MockPreparedStatement statement = new MockPreparedStatement(); + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final JdbcParameterBinder binder = + JdbcParameterBinder.builder(statement, root).bindAll().build(); + assertThat(binder.next()).isFalse(); + + @SuppressWarnings("unchecked") + final V vector = (V) root.getVector(0); + setValue.accept(vector, 0, values.get(0)); + setValue.accept(vector, 1, values.get(1)); + root.setRowCount(2); + + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); + assertThat(binder.next()).isFalse(); + + binder.reset(); + + setValue.accept(vector, 0, values.get(0)); + setValue.accept(vector, 1, values.get(2)); + setValue.accept(vector, 2, values.get(0)); + setValue.accept(vector, 3, values.get(2)); + setValue.accept(vector, 4, values.get(1)); + root.setRowCount(5); + + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); + assertThat(binder.next()).isFalse(); + } + } + + void testMapType(ArrowType arrowType, TriConsumer setValue, + BiConsumer setNull, List values, + ArrowType elementType) throws SQLException { + int jdbcType = Types.VARCHAR; + FieldType keyType = new FieldType(false, elementType, null, null); + FieldType mapType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); + Schema schema = new Schema(Collections.singletonList(new Field("field", FieldType.nullable(arrowType), + Collections.singletonList(new Field(MapVector.KEY_NAME, mapType, + Arrays.asList(new Field(MapVector.KEY_NAME, keyType, null), + new Field(MapVector.VALUE_NAME, keyType, null))))))); + try (final MockPreparedStatement statement = new MockPreparedStatement(); + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final JdbcParameterBinder binder = + JdbcParameterBinder.builder(statement, root).bindAll().build(); + assertThat(binder.next()).isFalse(); + + @SuppressWarnings("unchecked") + final V vector = (V) root.getVector(0); + final ColumnBinder columnBinder = ColumnBinder.forVector(vector); + assertThat(columnBinder.getJdbcType()).isEqualTo(jdbcType); + + setValue.accept(vector, 0, values.get(0)); + setValue.accept(vector, 1, values.get(1)); + setNull.accept(vector, 2); + root.setRowCount(3); + + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(0).toString()); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(1).toString()); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isNull(); + assertThat(statement.getParamType(1)).isEqualTo(jdbcType); + assertThat(binder.next()).isFalse(); + + binder.reset(); + + setNull.accept(vector, 0); + setValue.accept(vector, 1, values.get(3)); + setValue.accept(vector, 2, values.get(0)); + setValue.accept(vector, 3, values.get(2)); + setValue.accept(vector, 4, values.get(1)); + root.setRowCount(5); + + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isNull(); + assertThat(statement.getParamType(1)).isEqualTo(jdbcType); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(3).toString()); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(0).toString()); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(2).toString()); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(1).toString()); + assertThat(binder.next()).isFalse(); + } + + // Non-nullable (since some types have a specialized binder) + schema = new Schema(Collections.singletonList(new Field("field", FieldType.notNullable(arrowType), + Collections.singletonList(new Field(MapVector.KEY_NAME, mapType, + Arrays.asList(new Field(MapVector.KEY_NAME, keyType, null), + new Field(MapVector.VALUE_NAME, keyType, null))))))); + try (final MockPreparedStatement statement = new MockPreparedStatement(); + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + @SuppressWarnings("unchecked") + final V vector = (V) root.getVector(0); + + final JdbcParameterBinder binder = + JdbcParameterBinder.builder(statement, root).bind(1, + new org.apache.arrow.adapter.jdbc.binder.MapBinder((MapVector) vector, Types.OTHER)).build(); + assertThat(binder.next()).isFalse(); + + setValue.accept(vector, 0, values.get(0)); + setValue.accept(vector, 1, values.get(1)); + root.setRowCount(2); + + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); + assertThat(binder.next()).isFalse(); + + binder.reset(); + + setValue.accept(vector, 0, values.get(0)); + setValue.accept(vector, 1, values.get(2)); + setValue.accept(vector, 2, values.get(0)); + setValue.accept(vector, 3, values.get(2)); + setValue.accept(vector, 4, values.get(1)); + root.setRowCount(5); + + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); + assertThat(binder.next()).isTrue(); + assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); + assertThat(binder.next()).isFalse(); + } + } } diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index 521686274675f..b21adeeda5b9b 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -35,7 +35,7 @@ org.apache.orc orc-core - 1.7.5 + 1.7.6 test @@ -104,7 +104,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT ../../pom.xml diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index d4281f6464d3b..8d75820dfe7df 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT arrow-algorithm Arrow Algorithms diff --git a/java/c/pom.xml b/java/c/pom.xml index 6d0632ea16584..28d2c94058b7f 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -13,7 +13,7 @@ arrow-java-root org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT 4.0.0 diff --git a/java/compression/pom.xml b/java/compression/pom.xml index a394ac63727e1..29c8c3ba74aff 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT arrow-compression Arrow Compression diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index 2727b1e2739d9..9eadf896888a5 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -15,7 +15,7 @@ arrow-java-root org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT 4.0.0 diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc index e96dfb8aed7e4..d088163903457 100644 --- a/java/dataset/src/main/cpp/jni_wrapper.cc +++ b/java/dataset/src/main/cpp/jni_wrapper.cc @@ -89,6 +89,8 @@ arrow::Result> GetFileFormat( switch (file_format_id) { case 0: return std::make_shared(); + case 1: + return std::make_shared(); default: std::string error_message = "illegal file format id: " + std::to_string(file_format_id); diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java b/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java index 107fc2f71d2aa..343e458ce23a9 100644 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java +++ b/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java @@ -22,6 +22,7 @@ */ public enum FileFormat { PARQUET(0), + ARROW_IPC(1), NONE(-1); private final int id; diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java index 92610b1145c74..2fd8a19bac1f1 100644 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java +++ b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java @@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -46,11 +47,15 @@ import org.apache.arrow.dataset.scanner.ScanOptions; import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VectorLoader; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowFileWriter; import org.apache.arrow.vector.ipc.ArrowReader; import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecordBuilder; @@ -316,6 +321,42 @@ public void testErrorThrownWhenIterateOnIteratorAfterTaskClose() throws Exceptio AutoCloseables.close(factory); } + @Test + public void testBaseArrowIpcRead() throws Exception { + File dataFile = TMP.newFile(); + Schema sourceSchema = new Schema(Collections.singletonList(Field.nullable("ints", new ArrowType.Int(32, true)))); + try (VectorSchemaRoot root = VectorSchemaRoot.create(sourceSchema, rootAllocator()); + FileOutputStream sink = new FileOutputStream(dataFile); + ArrowFileWriter writer = new ArrowFileWriter(root, /*dictionaryProvider=*/null, sink.getChannel())) { + IntVector ints = (IntVector) root.getVector(0); + ints.setSafe(0, 0); + ints.setSafe(1, 1024); + ints.setSafe(2, Integer.MAX_VALUE); + root.setRowCount(3); + writer.start(); + writer.writeBatch(); + writer.end(); + } + + String arrowDataURI = dataFile.toURI().toString(); + FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(), + FileFormat.ARROW_IPC, arrowDataURI); + ScanOptions options = new ScanOptions(100); + Schema schema = inferResultSchemaFromFactory(factory, options); + List datum = collectResultFromFactory(factory, options); + + assertSingleTaskProduced(factory, options); + assertEquals(1, datum.size()); + assertEquals(1, schema.getFields().size()); + assertEquals("ints", schema.getFields().get(0).getName()); + + String expectedJsonUnordered = String.format("[[0],[1024],[%d]]", Integer.MAX_VALUE); + checkParquetReadResult(schema, expectedJsonUnordered, datum); + + AutoCloseables.close(datum); + AutoCloseables.close(factory); + } + private void checkParquetReadResult(Schema schema, String expectedJson, List actual) throws IOException { final ObjectMapper json = new ObjectMapper(); diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 28325e9bffba3..a7d600b62ecfd 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -14,7 +14,7 @@ arrow-flight org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT ../pom.xml diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java index 26e0274fa0efe..4a99ab22842e2 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java @@ -72,7 +72,7 @@ public class AddWritableBuffer { tmpBufChainOut = tmpBufChainOut2; } catch (Exception ex) { - ex.printStackTrace(); + new RuntimeException("Failed to initialize AddWritableBuffer, falling back to slow path", ex).printStackTrace(); } bufConstruct = tmpConstruct; diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java index 5f8a71576c3ab..82cfd7f39c91c 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java @@ -51,7 +51,7 @@ public class GetReadableBuffer { tmpField = f; tmpClazz = clazz; } catch (Exception e) { - e.printStackTrace(); + new RuntimeException("Failed to initialize GetReadableBuffer, falling back to slow path", e).printStackTrace(); } READABLE_BUFFER = tmpField; BUFFER_INPUT_STREAM = tmpClazz; diff --git a/java/flight/flight-grpc/pom.xml b/java/flight/flight-grpc/pom.xml index 5c113be8615d2..07b258f1b765b 100644 --- a/java/flight/flight-grpc/pom.xml +++ b/java/flight/flight-grpc/pom.xml @@ -13,7 +13,7 @@ arrow-flight org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT ../pom.xml 4.0.0 diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index e676be979dbb3..eeba667236698 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -15,7 +15,7 @@ arrow-flight org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT ../pom.xml diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java index cf17349064cb2..19c1378cfe6c5 100644 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java +++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java @@ -26,6 +26,7 @@ import org.apache.arrow.flight.FlightServer; import org.apache.arrow.flight.FlightStream; import org.apache.arrow.flight.Location; +import org.apache.arrow.flight.SchemaResult; import org.apache.arrow.flight.Ticket; import org.apache.arrow.flight.sql.FlightSqlClient; import org.apache.arrow.flight.sql.FlightSqlProducer; @@ -72,32 +73,52 @@ private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Excepti validate(FlightSqlProducer.Schemas.GET_CATALOGS_SCHEMA, sqlClient.getCatalogs(options), sqlClient); + validateSchema(FlightSqlProducer.Schemas.GET_CATALOGS_SCHEMA, sqlClient.getCatalogsSchema(options)); + validate(FlightSqlProducer.Schemas.GET_SCHEMAS_SCHEMA, sqlClient.getSchemas("catalog", "db_schema_filter_pattern", options), sqlClient); + validateSchema(FlightSqlProducer.Schemas.GET_SCHEMAS_SCHEMA, sqlClient.getSchemasSchema()); + validate(FlightSqlProducer.Schemas.GET_TABLES_SCHEMA, sqlClient.getTables("catalog", "db_schema_filter_pattern", "table_filter_pattern", Arrays.asList("table", "view"), true, options), sqlClient); - validate(FlightSqlProducer.Schemas.GET_TABLE_TYPES_SCHEMA, sqlClient.getTableTypes(options), - sqlClient); + validateSchema(FlightSqlProducer.Schemas.GET_TABLES_SCHEMA, + sqlClient.getTablesSchema(/*includeSchema*/true, options)); + validateSchema(FlightSqlProducer.Schemas.GET_TABLES_SCHEMA_NO_SCHEMA, + sqlClient.getTablesSchema(/*includeSchema*/false, options)); + + validate(FlightSqlProducer.Schemas.GET_TABLE_TYPES_SCHEMA, sqlClient.getTableTypes(options), sqlClient); + validateSchema(FlightSqlProducer.Schemas.GET_TABLE_TYPES_SCHEMA, sqlClient.getTableTypesSchema(options)); + validate(FlightSqlProducer.Schemas.GET_PRIMARY_KEYS_SCHEMA, sqlClient.getPrimaryKeys(TableRef.of("catalog", "db_schema", "table"), options), sqlClient); + validateSchema(FlightSqlProducer.Schemas.GET_PRIMARY_KEYS_SCHEMA, sqlClient.getPrimaryKeysSchema(options)); + validate(FlightSqlProducer.Schemas.GET_EXPORTED_KEYS_SCHEMA, sqlClient.getExportedKeys(TableRef.of("catalog", "db_schema", "table"), options), sqlClient); + validateSchema(FlightSqlProducer.Schemas.GET_EXPORTED_KEYS_SCHEMA, sqlClient.getExportedKeysSchema(options)); + validate(FlightSqlProducer.Schemas.GET_IMPORTED_KEYS_SCHEMA, sqlClient.getImportedKeys(TableRef.of("catalog", "db_schema", "table"), options), sqlClient); + validateSchema(FlightSqlProducer.Schemas.GET_IMPORTED_KEYS_SCHEMA, sqlClient.getImportedKeysSchema(options)); + validate(FlightSqlProducer.Schemas.GET_CROSS_REFERENCE_SCHEMA, sqlClient.getCrossReference(TableRef.of("pk_catalog", "pk_db_schema", "pk_table"), TableRef.of("fk_catalog", "fk_db_schema", "fk_table"), options), sqlClient); - validate(FlightSqlProducer.Schemas.GET_TYPE_INFO_SCHEMA, - sqlClient.getXdbcTypeInfo(options), sqlClient); + validateSchema(FlightSqlProducer.Schemas.GET_CROSS_REFERENCE_SCHEMA, sqlClient.getCrossReferenceSchema(options)); + + validate(FlightSqlProducer.Schemas.GET_TYPE_INFO_SCHEMA, sqlClient.getXdbcTypeInfo(options), sqlClient); + validateSchema(FlightSqlProducer.Schemas.GET_TYPE_INFO_SCHEMA, sqlClient.getXdbcTypeInfoSchema(options)); + validate(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, sqlClient.getSqlInfo(new FlightSql.SqlInfo[] {FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME, FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY}, options), sqlClient); + validateSchema(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, sqlClient.getSqlInfoSchema(options)); } private void validateStatementExecution(FlightSqlClient sqlClient) throws Exception { @@ -105,6 +126,8 @@ private void validateStatementExecution(FlightSqlClient sqlClient) throws Except validate(FlightSqlScenarioProducer.getQuerySchema(), sqlClient.execute("SELECT STATEMENT", options), sqlClient); + validateSchema(FlightSqlScenarioProducer.getQuerySchema(), + sqlClient.getExecuteSchema("SELECT STATEMENT", options)); IntegrationAssertions.assertEquals(sqlClient.executeUpdate("UPDATE STATEMENT", options), UPDATE_STATEMENT_EXPECTED_ROWS); @@ -122,6 +145,7 @@ private void validatePreparedStatementExecution(FlightSqlClient sqlClient, validate(FlightSqlScenarioProducer.getQuerySchema(), preparedStatement.execute(options), sqlClient); + validateSchema(FlightSqlScenarioProducer.getQuerySchema(), preparedStatement.fetchSchema()); } try (FlightSqlClient.PreparedStatement preparedStatement = sqlClient.prepare( @@ -139,4 +163,8 @@ private void validate(Schema expectedSchema, FlightInfo flightInfo, IntegrationAssertions.assertEquals(expectedSchema, actualSchema); } } + + private void validateSchema(Schema expected, SchemaResult actual) { + IntegrationAssertions.assertEquals(expected, actual.getSchema()); + } } diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java index 7db99187c466e..33d62b650e176 100644 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java +++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java @@ -125,9 +125,18 @@ public FlightInfo getFlightInfoPreparedStatement(FlightSql.CommandPreparedStatem return getFlightInfoForSchema(command, descriptor, getQuerySchema()); } + @Override + public SchemaResult getSchemaPreparedStatement(FlightSql.CommandPreparedStatementQuery command, CallContext context, + FlightDescriptor descriptor) { + IntegrationAssertions.assertEquals(command.getPreparedStatementHandle().toStringUtf8(), + "SELECT PREPARED STATEMENT HANDLE"); + return new SchemaResult(getQuerySchema()); + } + @Override public SchemaResult getSchemaStatement(FlightSql.CommandStatementQuery command, CallContext context, FlightDescriptor descriptor) { + IntegrationAssertions.assertEquals(command.getQuery(), "SELECT STATEMENT"); return new SchemaResult(getQuerySchema()); } diff --git a/java/flight/flight-integration-tests/src/main/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java b/java/flight/flight-integration-tests/src/main/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java new file mode 100644 index 0000000000000..dfb9a810857ba --- /dev/null +++ b/java/flight/flight-integration-tests/src/main/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.flight.integration.tests; + +import org.apache.arrow.flight.FlightClient; +import org.apache.arrow.flight.FlightServer; +import org.apache.arrow.flight.Location; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.junit.jupiter.api.Test; + +/** + * Run the integration test scenarios in-process. + */ +class IntegrationTest { + @Test + void authBasicProto() throws Exception { + testScenario("auth:basic_proto"); + } + + @Test + void middleware() throws Exception { + testScenario("middleware"); + } + + @Test + void flightSql() throws Exception { + testScenario("flight_sql"); + } + + void testScenario(String scenarioName) throws Exception { + try (final BufferAllocator allocator = new RootAllocator()) { + final FlightServer.Builder builder = FlightServer.builder() + .allocator(allocator) + .location(Location.forGrpcInsecure("0.0.0.0", 0)); + final Scenario scenario = Scenarios.getScenario(scenarioName); + scenario.buildServer(builder); + builder.producer(scenario.producer(allocator, Location.forGrpcInsecure("0.0.0.0", 0))); + + try (final FlightServer server = builder.build()) { + server.start(); + + final Location location = Location.forGrpcInsecure("localhost", server.getPort()); + try (final FlightClient client = FlightClient.builder(allocator, location).build()) { + scenario.client(allocator, location, client); + } + } + } + } +} diff --git a/java/flight/flight-jdbc-driver/jdbc-spotbugs-exclude.xml b/java/flight/flight-sql-jdbc-driver/jdbc-spotbugs-exclude.xml similarity index 100% rename from java/flight/flight-jdbc-driver/jdbc-spotbugs-exclude.xml rename to java/flight/flight-sql-jdbc-driver/jdbc-spotbugs-exclude.xml diff --git a/java/flight/flight-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml similarity index 80% rename from java/flight/flight-jdbc-driver/pom.xml rename to java/flight/flight-sql-jdbc-driver/pom.xml index b127998cc8b74..b8a49165adb4a 100644 --- a/java/flight/flight-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -16,16 +16,16 @@ arrow-flight org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT ../pom.xml 4.0.0 - flight-jdbc-driver - Arrow Flight JDBC Driver - (Contrib/Experimental)A library for querying data using a JDBC driver for Arrow Flight. + flight-sql-jdbc-driver + Arrow Flight SQL JDBC Driver + (Contrib/Experimental) A JDBC driver based on Arrow Flight SQL. jar - http://maven.apache.org + https://arrow.apache.org ${project.parent.groupId}:${project.parent.artifactId} @@ -41,17 +41,13 @@ flight-core ${project.version} - - io.netty - netty-transport-native-unix-common - - - io.netty - netty-transport-native-kqueue + + io.netty + netty-transport-native-kqueue - - io.netty - netty-transport-native-epoll + + io.netty + netty-transport-native-epoll @@ -117,14 +113,14 @@ org.mockito mockito-core - 3.9.0 + 3.12.4 test org.mockito mockito-inline - 3.9.0 + 3.12.4 test @@ -246,6 +242,7 @@ org.codehaus.mojo properties-maven-plugin + 1.1.0 write-project-properties-to-file @@ -262,7 +259,6 @@ org.jacoco jacoco-maven-plugin - 0.8.2 @@ -302,6 +298,11 @@ CLASS + + org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl + + org.apache.arrow.driver.jdbc.utils.UrlParser + BRANCH @@ -315,23 +316,60 @@ - - org.apache.maven.plugins - maven-surefire-plugin - 3.0.0-M5 - - - ${surefireArgLine} - - **/IT*.java - - false - - ${project.basedir}/../../../testing/data - - - + + + jdk8 + + 1.8 + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${surefireArgLine} + + **/IT*.java + + false + + ${project.basedir}/../../../testing/data + + + + + + + + + jdk9+ + + [9,] + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${surefireArgLine} --add-opens=java.base/java.nio=ALL-UNNAMED + + **/IT*.java + + false + + ${project.basedir}/../../../testing/data + + + + + + + diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightInfoStatement.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightInfoStatement.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightInfoStatement.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightInfoStatement.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArray.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArray.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArray.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArray.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSource.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSource.java similarity index 96% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSource.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSource.java index 2f25f82b3e8ed..46a1d3ff87c34 100644 --- a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSource.java +++ b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSource.java @@ -96,7 +96,9 @@ private ArrowFlightJdbcPooledConnection createPooledConnection( public void connectionClosed(ConnectionEvent connectionEvent) { final ArrowFlightJdbcPooledConnection pooledConnection = (ArrowFlightJdbcPooledConnection) connectionEvent.getSource(); - pool.get(pooledConnection.getProperties()).add(pooledConnection); + Queue connectionQueue = + pool.get(pooledConnection.getProperties()); + connectionQueue.add(pooledConnection); } @Override diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDataSource.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDataSource.java similarity index 98% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDataSource.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDataSource.java index 6e60bd95d5906..a57eeaa830492 100644 --- a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDataSource.java +++ b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDataSource.java @@ -71,7 +71,7 @@ protected final Properties getProperties(final String username, final String pas if (password != null) { newProperties.replace(ArrowFlightConnectionProperty.PASSWORD.camelName(), password); } - return newProperties; + return ArrowFlightJdbcDriver.lowerCasePropertyKeys(newProperties); } /** diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java similarity index 84% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java index 216c37fb5d32d..a72fbd3a4d592 100644 --- a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java +++ b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java @@ -36,6 +36,7 @@ import org.apache.arrow.flight.FlightRuntimeException; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.Preconditions; +import org.apache.arrow.util.VisibleForTesting; import org.apache.calcite.avatica.AvaticaConnection; import org.apache.calcite.avatica.DriverVersion; import org.apache.calcite.avatica.Meta; @@ -45,9 +46,9 @@ * JDBC driver for querying data from an Apache Arrow Flight server. */ public class ArrowFlightJdbcDriver extends UnregisteredDriver { - - private static final String CONNECT_STRING_PREFIX = "jdbc:arrow-flight://"; - private static final String CONNECTION_STRING_EXPECTED = "jdbc:arrow-flight://[host][:port][?param1=value&...]"; + private static final String CONNECT_STRING_PREFIX = "jdbc:arrow-flight-sql://"; + private static final String CONNECT_STRING_PREFIX_DEPRECATED = "jdbc:arrow-flight://"; + private static final String CONNECTION_STRING_EXPECTED = "jdbc:arrow-flight-sql://[host][:port][?param1=value&...]"; private static DriverVersion version; static { @@ -80,7 +81,7 @@ public ArrowFlightConnection connect(final String url, final Properties info) this, factory, url, - properties, + lowerCasePropertyKeys(properties), new RootAllocator(Long.MAX_VALUE)); } catch (final FlightRuntimeException e) { throw new SQLException("Failed to connect.", e); @@ -147,7 +148,8 @@ protected String getConnectStringPrefix() { @Override public boolean acceptsURL(final String url) { - return Preconditions.checkNotNull(url).startsWith(CONNECT_STRING_PREFIX); + Preconditions.checkNotNull(url); + return url.startsWith(CONNECT_STRING_PREFIX) || url.startsWith(CONNECT_STRING_PREFIX_DEPRECATED); } /** @@ -155,7 +157,7 @@ public boolean acceptsURL(final String url) { * arguments after the {@link #CONNECT_STRING_PREFIX}. *

* This method gets the args if the provided URL follows this pattern: - * {@code jdbc:arrow-flight://:[/?key1=val1&key2=val2&(...)]} + * {@code jdbc:arrow-flight-sql://:[/?key1=val1&key2=val2&(...)]} * *

* @@ -168,7 +170,7 @@ public boolean acceptsURL(final String url) { * * * * @@ -200,7 +202,8 @@ public boolean acceptsURL(final String url) { * @return the parsed arguments. * @throws SQLException If an error occurs while trying to parse the URL. */ - private Map getUrlsArgs(String url) + @VisibleForTesting // ArrowFlightJdbcDriverTest + Map getUrlsArgs(String url) throws SQLException { /* @@ -212,7 +215,7 @@ private Map getUrlsArgs(String url) * ===== * * Keep in mind that the URL must ALWAYS follow the pattern: - * "jdbc:arrow-flight://:[/?param1=value1¶m2=value2&(...)]." + * "jdbc:arrow-flight-sql://:[/?param1=value1¶m2=value2&(...)]." * */ @@ -235,20 +238,32 @@ private Map getUrlsArgs(String url) throw new SQLException("Malformed/invalid URL!", e); } - if (!Objects.equals(uri.getScheme(), "arrow-flight")) { + if (!Objects.equals(uri.getScheme(), "arrow-flight") && + !Objects.equals(uri.getScheme(), "arrow-flight-sql")) { throw new SQLException("URL Scheme must be 'arrow-flight'. Expected format: " + CONNECTION_STRING_EXPECTED); } - + if (uri.getHost() == null) { + throw new SQLException("URL must have a host. Expected format: " + CONNECTION_STRING_EXPECTED); + } else if (uri.getPort() < 0) { + throw new SQLException("URL must have a port. Expected format: " + CONNECTION_STRING_EXPECTED); + } resultMap.put(ArrowFlightConnectionProperty.HOST.camelName(), uri.getHost()); // host resultMap.put(ArrowFlightConnectionProperty.PORT.camelName(), uri.getPort()); // port final String extraParams = uri.getRawQuery(); // optional params - - final Map keyValuePairs = UrlParser.parse(extraParams, "&"); - resultMap.putAll(keyValuePairs); + if (extraParams != null) { + final Map keyValuePairs = UrlParser.parse(extraParams, "&"); + resultMap.putAll(keyValuePairs); + } return resultMap; } + + static Properties lowerCasePropertyKeys(final Properties properties) { + final Properties resultProperty = new Properties(); + properties.forEach((k, v) -> resultProperty.put(k.toString().toLowerCase(), v)); + return resultProperty; + } } diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactory.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactory.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactory.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactory.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFlightStreamResultSet.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFlightStreamResultSet.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFlightStreamResultSet.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFlightStreamResultSet.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcPooledConnection.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcPooledConnection.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcPooledConnection.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcPooledConnection.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTime.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTime.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTime.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTime.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcVectorSchemaRootResultSet.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcVectorSchemaRootResultSet.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcVectorSchemaRootResultSet.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcVectorSchemaRootResultSet.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatement.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatement.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatement.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatement.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightStatement.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightStatement.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightStatement.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightStatement.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactory.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactory.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactory.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactory.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorGetter.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorGetter.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorGetter.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorGetter.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorGetter.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorGetter.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorGetter.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorGetter.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorGetter.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorGetter.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorGetter.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorGetter.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcFixedSizeListVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcFixedSizeListVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcFixedSizeListVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcFixedSizeListVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcLargeListVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcLargeListVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcLargeListVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcLargeListVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcListVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcListVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcListVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcListVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcNumericGetter.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcNumericGetter.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcNumericGetter.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcNumericGetter.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java similarity index 95% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java index 57fd816d9dce3..ac338a85d6292 100644 --- a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java +++ b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java @@ -17,8 +17,6 @@ package org.apache.arrow.driver.jdbc.utils; -import static java.lang.String.format; - import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -201,10 +199,18 @@ public enum ArrowFlightConnectionProperty implements ConnectionProperty { */ public Object get(final Properties properties) { Preconditions.checkNotNull(properties, "Properties cannot be null."); - Preconditions.checkState( - properties.containsKey(camelName) || !required, - format("Required property not provided: <%s>.", this)); - return properties.getOrDefault(camelName, defaultValue); + Object value = properties.get(camelName); + if (value == null) { + value = properties.get(camelName.toLowerCase()); + } + if (required) { + if (value == null) { + throw new IllegalStateException(String.format("Required property not provided: <%s>.", this)); + } + return value; + } else { + return value != null ? value : defaultValue; + } } /** diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtils.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtils.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtils.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtils.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightStreamQueue.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightStreamQueue.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightStreamQueue.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightStreamQueue.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/SqlTypes.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/SqlTypes.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/SqlTypes.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/SqlTypes.java diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java similarity index 53% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java index fbef721793b02..e52251f53918a 100644 --- a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java +++ b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java @@ -17,33 +17,49 @@ package org.apache.arrow.driver.jdbc.utils; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; import java.util.HashMap; import java.util.Map; /** * URL Parser for extracting key values from a connection string. */ -public class UrlParser { +public final class UrlParser { + private UrlParser() { + } + /** - * Parse a url key value parameters. + * Parse URL key value parameters. + * + *

URL-decodes keys and values. * * @param url {@link String} * @return {@link Map} */ public static Map parse(String url, String separator) { Map resultMap = new HashMap<>(); - String[] keyValues = url.split(separator); + if (url != null) { + String[] keyValues = url.split(separator); - for (String keyValue : keyValues) { - int separatorKey = keyValue.indexOf("="); // Find the first equal sign to split key and value. - String key = keyValue.substring(0, separatorKey); - String value = ""; - if (!keyValue.endsWith("=")) { // Avoid crashes for empty values. - value = keyValue.substring(separatorKey + 1); + for (String keyValue : keyValues) { + try { + int separatorKey = keyValue.indexOf("="); // Find the first equal sign to split key and value. + if (separatorKey != -1) { // Avoid crashes when not finding an equal sign in the property value. + String key = keyValue.substring(0, separatorKey); + key = URLDecoder.decode(key, "UTF-8"); + String value = ""; + if (!keyValue.endsWith("=")) { // Avoid crashes for empty values. + value = keyValue.substring(separatorKey + 1); + } + value = URLDecoder.decode(value, "UTF-8"); + resultMap.put(key, value); + } + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } } - resultMap.put(key, value); } - return resultMap; } } diff --git a/java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java b/java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java rename to java/flight/flight-sql-jdbc-driver/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java diff --git a/java/flight/flight-jdbc-driver/src/main/resources/META-INF/services/java.sql.Driver b/java/flight/flight-sql-jdbc-driver/src/main/resources/META-INF/services/java.sql.Driver similarity index 100% rename from java/flight/flight-jdbc-driver/src/main/resources/META-INF/services/java.sql.Driver rename to java/flight/flight-sql-jdbc-driver/src/main/resources/META-INF/services/java.sql.Driver diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java similarity index 99% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java index 3543dd0313f93..0d930f4c44e1f 100644 --- a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java +++ b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java @@ -91,6 +91,7 @@ /** * Class containing the tests from the {@link ArrowDatabaseMetadata}. */ +@SuppressWarnings("DoubleBraceInitialization") public class ArrowDatabaseMetadataTest { public static final boolean EXPECTED_MAX_ROW_SIZE_INCLUDES_BLOBS = false; private static final MockFlightSqlProducer FLIGHT_SQL_PRODUCER = new MockFlightSqlProducer(); diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionCookieTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionCookieTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionCookieTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionCookieTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSourceTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSourceTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSourceTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSourceTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java similarity index 62% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java index d39ec61f3099e..682c20c696ac3 100644 --- a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java +++ b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java @@ -18,15 +18,16 @@ package org.apache.arrow.driver.jdbc; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; import java.sql.Connection; import java.sql.Driver; import java.sql.DriverManager; import java.sql.SQLException; import java.util.Collection; import java.util.Map; +import java.util.Properties; import org.apache.arrow.driver.jdbc.authentication.UserPasswordAuthentication; import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; @@ -37,7 +38,6 @@ import org.junit.After; import org.junit.Before; import org.junit.ClassRule; -import org.junit.Ignore; import org.junit.Test; /** @@ -82,8 +82,10 @@ public void tearDown() throws Exception { */ @Test public void testDriverIsRegisteredInDriverManager() throws Exception { - assert DriverManager.getDriver( - "jdbc:arrow-flight://localhost:32010") instanceof ArrowFlightJdbcDriver; + assertTrue(DriverManager.getDriver("jdbc:arrow-flight://localhost:32010") instanceof + ArrowFlightJdbcDriver); + assertTrue(DriverManager.getDriver("jdbc:arrow-flight-sql://localhost:32010") instanceof + ArrowFlightJdbcDriver); } /** @@ -116,8 +118,61 @@ public void testShouldConnectWhenProvidedWithValidUrl() throws Exception { dataSource.getConfig().getHost() + ":" + dataSource.getConfig().getPort() + "?" + "useEncryption=false", - dataSource.getProperties(dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) { - assert connection.isValid(300); + dataSource.getProperties(dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) { + assertTrue(connection.isValid(300)); + } + try (Connection connection = + driver.connect("jdbc:arrow-flight-sql://" + + dataSource.getConfig().getHost() + ":" + + dataSource.getConfig().getPort() + "?" + + "useEncryption=false", + dataSource.getProperties(dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) { + assertTrue(connection.isValid(300)); + } + } + + @Test + public void testConnectWithInsensitiveCasePropertyKeys() throws Exception { + // Get the Arrow Flight JDBC driver by providing a URL with insensitive case property keys. + final Driver driver = new ArrowFlightJdbcDriver(); + + try (Connection connection = + driver.connect("jdbc:arrow-flight://" + + dataSource.getConfig().getHost() + ":" + + dataSource.getConfig().getPort() + "?" + + "UseEncryptiOn=false", + dataSource.getProperties(dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) { + assertTrue(connection.isValid(300)); + } + try (Connection connection = + driver.connect("jdbc:arrow-flight-sql://" + + dataSource.getConfig().getHost() + ":" + + dataSource.getConfig().getPort() + "?" + + "UseEncryptiOn=false", + dataSource.getProperties(dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) { + assertTrue(connection.isValid(300)); + } + } + + @Test + public void testConnectWithInsensitiveCasePropertyKeys2() throws Exception { + // Get the Arrow Flight JDBC driver by providing a property object with insensitive case keys. + final Driver driver = new ArrowFlightJdbcDriver(); + Properties properties = + dataSource.getProperties(dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()); + properties.put("UseEncryptiOn", "false"); + + try (Connection connection = + driver.connect("jdbc:arrow-flight://" + + dataSource.getConfig().getHost() + ":" + + dataSource.getConfig().getPort(), properties)) { + assertTrue(connection.isValid(300)); + } + try (Connection connection = + driver.connect("jdbc:arrow-flight-sql://" + + dataSource.getConfig().getHost() + ":" + + dataSource.getConfig().getPort(), properties)) { + assertTrue(connection.isValid(300)); } } @@ -151,51 +206,62 @@ public void testShouldThrowExceptionWhenAttemptingToConnectToUrlNoPrefix() throw /** * Tests whether an exception is thrown upon attempting to connect to a * malformed URI. - * - * @throws Exception If an error occurs. */ - @Test(expected = SQLException.class) - @Ignore // TODO Rework this test. - public void testShouldThrowExceptionWhenAttemptingToConnectToUrlNoPort() throws Exception { + @Test + public void testShouldThrowExceptionWhenAttemptingToConnectToUrlNoPort() { final Driver driver = new ArrowFlightJdbcDriver(); - // FIXME This test was passing because the prefix was wrong, NOT because it didn't specify the port. - final String malformedUri = "jdbc:arrow-flight://32010:localhost"; - driver.connect(malformedUri, dataSource.getProperties("flight", "flight123")); + SQLException e = assertThrows(SQLException.class, () -> { + Properties properties = dataSource.getProperties(dataSource.getConfig().getUser(), + dataSource.getConfig().getPassword()); + Connection conn = driver.connect("jdbc:arrow-flight://localhost", properties); + conn.close(); + }); + assertTrue(e.getMessage().contains("URL must have a port")); + e = assertThrows(SQLException.class, () -> { + Properties properties = dataSource.getProperties(dataSource.getConfig().getUser(), + dataSource.getConfig().getPassword()); + Connection conn = driver.connect("jdbc:arrow-flight-sql://localhost", properties); + conn.close(); + }); + assertTrue(e.getMessage().contains("URL must have a port")); } /** * Tests whether an exception is thrown upon attempting to connect to a * malformed URI. - * - * @throws Exception If an error occurs. */ - @Test(expected = SQLException.class) - @Ignore // TODO Rework this test. - public void testShouldThrowExceptionWhenAttemptingToConnectToUrlNoHost() throws Exception { + @Test + public void testShouldThrowExceptionWhenAttemptingToConnectToUrlNoHost() { final Driver driver = new ArrowFlightJdbcDriver(); - // FIXME This test was passing because the prefix was wrong, NOT because it didn't specify the host. - final String malformedUri = "jdbc:arrow-flight://32010:localhost"; - driver.connect(malformedUri, dataSource.getProperties(dataSource.getConfig().getUser(), - dataSource.getConfig().getPassword())); + SQLException e = assertThrows(SQLException.class, () -> { + Properties properties = dataSource.getProperties(dataSource.getConfig().getUser(), + dataSource.getConfig().getPassword()); + Connection conn = driver.connect("jdbc:arrow-flight://32010:localhost", properties); + conn.close(); + }); + assertTrue(e.getMessage().contains("URL must have a host")); + + e = assertThrows(SQLException.class, () -> { + Properties properties = dataSource.getProperties(dataSource.getConfig().getUser(), + dataSource.getConfig().getPassword()); + Connection conn = driver.connect("jdbc:arrow-flight-sql://32010:localhost", properties); + conn.close(); + }); + assertTrue(e.getMessage().contains("URL must have a host")); } /** - * Tests whether {@code ArrowFlightJdbcDriverTest#getUrlsArgs} returns the + * Tests whether {@link ArrowFlightJdbcDriver#getUrlsArgs} returns the * correct URL parameters. * * @throws Exception If an error occurs. */ - @SuppressWarnings("unchecked") @Test public void testDriverUrlParsingMechanismShouldReturnTheDesiredArgsFromUrl() throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - - final Method parseUrl = driver.getClass().getDeclaredMethod("getUrlsArgs", String.class); - - parseUrl.setAccessible(true); + final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); - final Map parsedArgs = (Map) parseUrl.invoke(driver, - "jdbc:arrow-flight://localhost:2222/?key1=value1&key2=value2&a=b"); + final Map parsedArgs = driver.getUrlsArgs( + "jdbc:arrow-flight-sql://localhost:2222/?key1=value1&key2=value2&a=b"); // Check size == the amount of args provided (scheme not included) assertEquals(5, parsedArgs.size()); @@ -212,17 +278,11 @@ public void testDriverUrlParsingMechanismShouldReturnTheDesiredArgsFromUrl() thr assertEquals(parsedArgs.get("a"), "b"); } - @SuppressWarnings("unchecked") @Test public void testDriverUrlParsingMechanismShouldReturnTheDesiredArgsFromUrlWithSemicolon() throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - - final Method parseUrl = driver.getClass().getDeclaredMethod("getUrlsArgs", String.class); - - parseUrl.setAccessible(true); - - final Map parsedArgs = (Map) parseUrl.invoke(driver, - "jdbc:arrow-flight://localhost:2222/;key1=value1;key2=value2;a=b"); + final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); + final Map parsedArgs = driver.getUrlsArgs( + "jdbc:arrow-flight-sql://localhost:2222/;key1=value1;key2=value2;a=b"); // Check size == the amount of args provided (scheme not included) assertEquals(5, parsedArgs.size()); @@ -239,17 +299,11 @@ public void testDriverUrlParsingMechanismShouldReturnTheDesiredArgsFromUrlWithSe assertEquals(parsedArgs.get("a"), "b"); } - @SuppressWarnings("unchecked") @Test public void testDriverUrlParsingMechanismShouldReturnTheDesiredArgsFromUrlWithOneSemicolon() throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - - final Method parseUrl = driver.getClass().getDeclaredMethod("getUrlsArgs", String.class); - - parseUrl.setAccessible(true); - - final Map parsedArgs = (Map) parseUrl.invoke(driver, - "jdbc:arrow-flight://localhost:2222/;key1=value1"); + final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); + final Map parsedArgs = driver.getUrlsArgs( + "jdbc:arrow-flight-sql://localhost:2222/;key1=value1"); // Check size == the amount of args provided (scheme not included) assertEquals(3, parsedArgs.size()); @@ -268,24 +322,12 @@ public void testDriverUrlParsingMechanismShouldReturnTheDesiredArgsFromUrlWithOn * Tests whether an exception is thrown upon attempting to connect to a * malformed URI. * - * @throws Exception If an error occurs. */ - @SuppressWarnings("unchecked") - @Test(expected = SQLException.class) - public void testDriverUrlParsingMechanismShouldThrowExceptionUponProvidedWithMalformedUrl() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - - final Method getUrlsArgs = driver.getClass().getDeclaredMethod("getUrlsArgs", String.class); - - getUrlsArgs.setAccessible(true); - - try { - final Map parsedArgs = (Map) getUrlsArgs.invoke(driver, - "jdbc:malformed-url-flight://localhost:2222"); - } catch (InvocationTargetException e) { - throw (SQLException) e.getCause(); - } + @Test + public void testDriverUrlParsingMechanismShouldThrowExceptionUponProvidedWithMalformedUrl() { + final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); + assertThrows(SQLException.class, () -> driver.getUrlsArgs( + "jdbc:malformed-url-flight://localhost:2222")); } /** @@ -294,17 +336,10 @@ public void testDriverUrlParsingMechanismShouldThrowExceptionUponProvidedWithMal * * @throws Exception If an error occurs. */ - @SuppressWarnings("unchecked") @Test public void testDriverUrlParsingMechanismShouldWorkWithIPAddress() throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - - final Method getUrlsArgs = driver.getClass().getDeclaredMethod("getUrlsArgs", String.class); - - getUrlsArgs.setAccessible(true); - - final Map parsedArgs = - (Map) getUrlsArgs.invoke(driver, "jdbc:arrow-flight://0.0.0.0:2222"); + final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); + final Map parsedArgs = driver.getUrlsArgs("jdbc:arrow-flight-sql://0.0.0.0:2222"); // Check size == the amount of args provided (scheme not included) assertEquals(2, parsedArgs.size()); @@ -322,18 +357,12 @@ public void testDriverUrlParsingMechanismShouldWorkWithIPAddress() throws Except * * @throws Exception If an error occurs. */ - @SuppressWarnings("unchecked") @Test public void testDriverUrlParsingMechanismShouldWorkWithEmbeddedEspecialCharacter() throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - - final Method getUrlsArgs = driver.getClass().getDeclaredMethod("getUrlsArgs", String.class); - - getUrlsArgs.setAccessible(true); - - final Map parsedArgs = (Map) getUrlsArgs.invoke(driver, - "jdbc:arrow-flight://0.0.0.0:2222?test1=test1value&test2%26continue=test2value&test3=test3value"); + final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); + final Map parsedArgs = driver.getUrlsArgs( + "jdbc:arrow-flight-sql://0.0.0.0:2222?test1=test1value&test2%26continue=test2value&test3=test3value"); // Check size == the amount of args provided (scheme not included) assertEquals(5, parsedArgs.size()); @@ -349,5 +378,4 @@ public void testDriverUrlParsingMechanismShouldWorkWithEmbeddedEspecialCharacter assertEquals(parsedArgs.get("test2&continue"), "test2value"); assertEquals(parsedArgs.get("test3"), "test3value"); } - } diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactoryTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactoryTest.java similarity index 98% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactoryTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactoryTest.java index 4cd85bcd14efa..c482169852e5e 100644 --- a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactoryTest.java +++ b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactoryTest.java @@ -81,7 +81,7 @@ public void testShouldBeAbleToEstablishAConnectionSuccessfully() throws Exceptio ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), false)); try (Connection connection = factory.newConnection(driver, constructor.newInstance(), - "jdbc:arrow-flight://localhost:32010", properties)) { + "jdbc:arrow-flight-sql://localhost:32010", properties)) { assert connection.isValid(300); } } diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTimeTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTimeTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTimeTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTimeTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatementTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatementTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatementTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatementTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteUpdateTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteUpdateTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteUpdateTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteUpdateTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTest.java similarity index 94% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTest.java index 2a530b30369ef..6fe7ba7129829 100644 --- a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTest.java +++ b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTest.java @@ -94,7 +94,7 @@ public void testUnencryptedConnectionShouldOpenSuccessfullyWhenProvidedValidCred properties.put("useEncryption", false); try (Connection connection = DriverManager.getConnection( - "jdbc:arrow-flight://" + FLIGHT_SERVER_TEST_RULE.getHost() + ":" + + "jdbc:arrow-flight-sql://" + FLIGHT_SERVER_TEST_RULE.getHost() + ":" + FLIGHT_SERVER_TEST_RULE.getPort(), properties)) { assert connection.isValid(300); } @@ -112,7 +112,7 @@ public void testUnencryptedConnectionWithEmptyHost() properties.put("user", userTest); properties.put("password", passTest); - final String invalidUrl = "jdbc:arrow-flight://"; + final String invalidUrl = "jdbc:arrow-flight-sql://"; DriverManager.getConnection(invalidUrl, properties); } @@ -156,7 +156,7 @@ public void testUnencryptedConnectionProvidingInvalidPort() passTest); properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), false); - final String invalidUrl = "jdbc:arrow-flight://" + FLIGHT_SERVER_TEST_RULE.getHost() + + final String invalidUrl = "jdbc:arrow-flight-sql://" + FLIGHT_SERVER_TEST_RULE.getHost() + ":" + 65537; DriverManager.getConnection(invalidUrl, properties); @@ -195,7 +195,7 @@ public void testUnencryptedConnectionShouldOpenSuccessfullyWithoutAuthentication properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), false); try (Connection connection = DriverManager - .getConnection("jdbc:arrow-flight://localhost:32010", properties)) { + .getConnection("jdbc:arrow-flight-sql://localhost:32010", properties)) { assert connection.isValid(300); } } @@ -222,7 +222,7 @@ public void testUnencryptedConnectionShouldThrowExceptionWhenProvidedWithInvalid properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), "invalidPassword"); - try (Connection ignored = DriverManager.getConnection("jdbc:arrow-flight://localhost:32010", + try (Connection ignored = DriverManager.getConnection("jdbc:arrow-flight-sql://localhost:32010", properties)) { Assert.fail(); } @@ -241,7 +241,7 @@ public void testTLSConnectionPropertyFalseCorrectCastUrlWithDriverManager() thro Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s?user=%s&password=%s&useEncryption=false", + "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s&useEncryption=false", FLIGHT_SERVER_TEST_RULE.getPort(), userTest, passTest)); @@ -271,7 +271,7 @@ public void testTLSConnectionPropertyFalseCorrectCastUrlAndPropertiesUsingSetPro Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s", + "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); @@ -299,7 +299,7 @@ public void testTLSConnectionPropertyFalseCorrectCastUrlAndPropertiesUsingPutWit Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s", + "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); @@ -320,7 +320,7 @@ public void testTLSConnectionPropertyFalseIntegerCorrectCastUrlWithDriverManager Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s?user=%s&password=%s&useEncryption=0", + "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s&useEncryption=0", FLIGHT_SERVER_TEST_RULE.getPort(), userTest, passTest)); @@ -350,7 +350,7 @@ public void testTLSConnectionPropertyFalseIntegerCorrectCastUrlAndPropertiesUsin Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s", + "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); @@ -379,7 +379,7 @@ public void testTLSConnectionPropertyFalseIntegerCorrectCastUrlAndPropertiesUsin Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s", + "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); @@ -400,7 +400,7 @@ public void testThreadPoolSizeConnectionPropertyCorrectCastUrlWithDriverManager( Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s?user=%s&password=%s&threadPoolSize=1&useEncryption=%s", + "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s&threadPoolSize=1&useEncryption=%s", FLIGHT_SERVER_TEST_RULE.getPort(), userTest, passTest, @@ -432,7 +432,7 @@ public void testThreadPoolSizeConnectionPropertyCorrectCastUrlAndPropertiesUsing Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s", + "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); @@ -462,7 +462,7 @@ public void testThreadPoolSizeConnectionPropertyCorrectCastUrlAndPropertiesUsing Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s", + "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); @@ -483,7 +483,7 @@ public void testPasswordConnectionPropertyIntegerCorrectCastUrlWithDriverManager Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s?user=%s&password=%s&useEncryption=%s", + "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s&useEncryption=%s", FLIGHT_SERVER_TEST_RULE.getPort(), userTest, passTest, @@ -514,7 +514,7 @@ public void testPasswordConnectionPropertyIntegerCorrectCastUrlAndPropertiesUsin Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s", + "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); @@ -543,7 +543,7 @@ public void testPasswordConnectionPropertyIntegerCorrectCastUrlAndPropertiesUsin Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s", + "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java similarity index 93% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java index 2d976a4d02d13..a5f9938f04bcb 100644 --- a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java +++ b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java @@ -19,6 +19,8 @@ import static org.junit.Assert.assertNotNull; +import java.net.URLEncoder; +import java.nio.file.Paths; import java.sql.Connection; import java.sql.Driver; import java.sql.DriverManager; @@ -33,6 +35,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.AutoCloseables; +import org.apache.arrow.util.Preconditions; import org.apache.calcite.avatica.org.apache.http.auth.UsernamePasswordCredentials; import org.junit.After; import org.junit.Assert; @@ -68,13 +71,17 @@ public class ConnectionTlsTest { .build(); } - private final String trustStorePath = getClass().getResource("/keys/keyStore.jks").getPath(); - private final String noCertificateKeyStorePath = getClass().getResource("/keys/noCertificate.jks").getPath(); + private String trustStorePath; + private String noCertificateKeyStorePath; private final String trustStorePass = "flight"; private BufferAllocator allocator; @Before public void setUp() throws Exception { + trustStorePath = Paths.get( + Preconditions.checkNotNull(getClass().getResource("/keys/keyStore.jks")).toURI()).toString(); + noCertificateKeyStorePath = Paths.get( + Preconditions.checkNotNull(getClass().getResource("/keys/noCertificate.jks")).toURI()).toString(); allocator = new RootAllocator(Long.MAX_VALUE); } @@ -288,15 +295,15 @@ public void testTLSConnectionPropertyTrueCorrectCastUrlWithDriverManager() throw final Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s?user=%s&password=%s" + + "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s" + "&useEncryption=true&useSystemTrustStore=false&%s=%s&%s=%s", FLIGHT_SERVER_TEST_RULE.getPort(), userTest, passTest, ArrowFlightConnectionProperty.TRUST_STORE.camelName(), - trustStorePath, + URLEncoder.encode(trustStorePath, "UTF-8"), ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), - trustStorePass)); + URLEncoder.encode(trustStorePass, "UTF-8"))); Assert.assertTrue(connection.isValid(0)); connection.close(); } @@ -324,7 +331,7 @@ public void testTLSConnectionPropertyTrueCorrectCastUrlAndPropertiesUsingSetProp final Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s", + "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); @@ -354,7 +361,7 @@ public void testTLSConnectionPropertyTrueCorrectCastUrlAndPropertiesUsingPutWith final Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s", + "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); @@ -375,14 +382,15 @@ public void testTLSConnectionPropertyTrueIntegerCorrectCastUrlWithDriverManager( final Connection connection = DriverManager.getConnection( String.format( - "jdbc:arrow-flight://localhost:%s?user=%s&password=%s&useEncryption=1&useSystemTrustStore=0&%s=%s&%s=%s", + "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s" + + "&useEncryption=1&useSystemTrustStore=0&%s=%s&%s=%s", FLIGHT_SERVER_TEST_RULE.getPort(), userTest, passTest, ArrowFlightConnectionProperty.TRUST_STORE.camelName(), - trustStorePath, + URLEncoder.encode(trustStorePath, "UTF-8"), ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), - trustStorePass)); + URLEncoder.encode(trustStorePass, "UTF-8"))); Assert.assertTrue(connection.isValid(0)); connection.close(); } @@ -409,7 +417,7 @@ public void testTLSConnectionPropertyTrueIntegerCorrectCastUrlAndPropertiesUsing properties.setProperty(ArrowFlightConnectionProperty.USE_SYSTEM_TRUST_STORE.camelName(), "0"); final Connection connection = DriverManager.getConnection( - String.format("jdbc:arrow-flight://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), + String.format("jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); connection.close(); @@ -437,7 +445,7 @@ public void testTLSConnectionPropertyTrueIntegerCorrectCastUrlAndPropertiesUsing properties.put(ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), trustStorePass); final Connection connection = DriverManager.getConnection( - String.format("jdbc:arrow-flight://localhost:%s", + String.format("jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_RULE.getPort()), properties); Assert.assertTrue(connection.isValid(0)); diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/FlightServerTestRule.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/FlightServerTestRule.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/FlightServerTestRule.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/FlightServerTestRule.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ResultSetMetadataTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ResultSetMetadataTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ResultSetMetadataTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ResultSetMetadataTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java similarity index 97% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java index a3e40c743e570..33473b6fe2baa 100644 --- a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java +++ b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java @@ -19,6 +19,9 @@ import static java.lang.String.format; import static java.util.Collections.synchronizedSet; +import static org.hamcrest.CoreMatchers.allOf; +import static org.hamcrest.CoreMatchers.anyOf; +import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertEquals; @@ -329,7 +332,9 @@ public void testShouldInterruptFlightStreamsIfQueryIsCancelledMidProcessingForTi .reduce(StringBuilder::append) .orElseThrow(IllegalStateException::new) .toString(), - is(format("Error while executing SQL \"%s\": Query canceled", query))); + anyOf(is(format("Error while executing SQL \"%s\": Query canceled", query)), + allOf(containsString(format("Error while executing SQL \"%s\"", query)), + containsString("CANCELLED")))); } } diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/TokenAuthenticationTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/TokenAuthenticationTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/TokenAuthenticationTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/TokenAuthenticationTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactoryTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactoryTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactoryTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactoryTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorUnitTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorUnitTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorUnitTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorUnitTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessorTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessorTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessorTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessorTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/Authentication.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/Authentication.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/Authentication.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/Authentication.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/TokenAuthentication.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/TokenAuthentication.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/TokenAuthentication.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/TokenAuthentication.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/UserPasswordAuthentication.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/UserPasswordAuthentication.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/UserPasswordAuthentication.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/authentication/UserPasswordAuthentication.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/AccessorTestUtils.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/AccessorTestUtils.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/AccessorTestUtils.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/AccessorTestUtils.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImplTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImplTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImplTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImplTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionPropertyTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionPropertyTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionPropertyTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionPropertyTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapperTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapperTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapperTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapperTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ConvertUtilsTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ConvertUtilsTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ConvertUtilsTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ConvertUtilsTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/CoreMockedSqlProducers.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/CoreMockedSqlProducers.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/CoreMockedSqlProducers.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/CoreMockedSqlProducers.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtilsTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtilsTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtilsTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtilsTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightSqlTestCertificates.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightSqlTestCertificates.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightSqlTestCertificates.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightSqlTestCertificates.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightStreamQueueTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightStreamQueueTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightStreamQueueTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightStreamQueueTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ResultSetTestUtils.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ResultSetTestUtils.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ResultSetTestUtils.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ResultSetTestUtils.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestRule.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestRule.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestRule.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestRule.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/SqlTypesTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/SqlTypesTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/SqlTypesTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/SqlTypesTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java diff --git a/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/UrlParserTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/UrlParserTest.java new file mode 100644 index 0000000000000..4e764ab322c69 --- /dev/null +++ b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/UrlParserTest.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.driver.jdbc.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +import java.util.Map; + +import org.junit.jupiter.api.Test; + +class UrlParserTest { + @Test + void parse() { + final Map parsed = UrlParser.parse("foo=bar&123=456", "&"); + assertEquals(parsed.get("foo"), "bar"); + assertEquals(parsed.get("123"), "456"); + } + + @Test + void parseEscaped() { + final Map parsed = UrlParser.parse("foo=bar%26&%26123=456", "&"); + assertEquals(parsed.get("foo"), "bar&"); + assertEquals(parsed.get("&123"), "456"); + } + + @Test + void parseEmpty() { + final Map parsed = UrlParser.parse("a=&b&foo=bar&123=456", "&"); + assertEquals(parsed.get("a"), ""); + assertNull(parsed.get("b")); + assertEquals(parsed.get("foo"), "bar"); + assertEquals(parsed.get("123"), "456"); + } +} diff --git a/java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformerTest.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformerTest.java similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformerTest.java rename to java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformerTest.java diff --git a/java/flight/flight-jdbc-driver/src/test/resources/keys/keyStore.jks b/java/flight/flight-sql-jdbc-driver/src/test/resources/keys/keyStore.jks similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/resources/keys/keyStore.jks rename to java/flight/flight-sql-jdbc-driver/src/test/resources/keys/keyStore.jks diff --git a/java/flight/flight-jdbc-driver/src/test/resources/keys/noCertificate.jks b/java/flight/flight-sql-jdbc-driver/src/test/resources/keys/noCertificate.jks similarity index 100% rename from java/flight/flight-jdbc-driver/src/test/resources/keys/noCertificate.jks rename to java/flight/flight-sql-jdbc-driver/src/test/resources/keys/noCertificate.jks diff --git a/java/flight/flight-sql-jdbc-driver/src/test/resources/logback.xml b/java/flight/flight-sql-jdbc-driver/src/test/resources/logback.xml new file mode 100644 index 0000000000000..ce66f8d82acda --- /dev/null +++ b/java/flight/flight-sql-jdbc-driver/src/test/resources/logback.xml @@ -0,0 +1,27 @@ + + + + + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index 8af1fa7086278..1ccbe223641ca 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -14,7 +14,7 @@ arrow-flight org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT ../pom.xml diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java index 221b9d0c76047..f1f07a1588f57 100644 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java +++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java @@ -38,7 +38,6 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.channels.Channels; -import java.sql.SQLException; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; @@ -98,6 +97,16 @@ public FlightInfo execute(final String query, final CallOption... options) { return client.getInfo(descriptor, options); } + /** + * Get the schema of the result set of a query. + */ + public SchemaResult getExecuteSchema(final String query, final CallOption... options) { + final CommandStatementQuery.Builder builder = CommandStatementQuery.newBuilder(); + builder.setQuery(query); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Execute an update query on the server. * @@ -138,6 +147,17 @@ public FlightInfo getCatalogs(final CallOption... options) { return client.getInfo(descriptor, options); } + /** + * Get the schema of {@link #getCatalogs(CallOption...)} from the server. + * + *

Should be identical to {@link FlightSqlProducer.Schemas#GET_CATALOGS_SCHEMA}. + */ + public SchemaResult getCatalogsSchema(final CallOption... options) { + final CommandGetCatalogs command = CommandGetCatalogs.getDefaultInstance(); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Request a list of schemas. * @@ -161,6 +181,17 @@ public FlightInfo getSchemas(final String catalog, final String dbSchemaFilterPa return client.getInfo(descriptor, options); } + /** + * Get the schema of {@link #getSchemas(String, String, CallOption...)} from the server. + * + *

Should be identical to {@link FlightSqlProducer.Schemas#GET_SCHEMAS_SCHEMA}. + */ + public SchemaResult getSchemasSchema(final CallOption... options) { + final CommandGetDbSchemas command = CommandGetDbSchemas.getDefaultInstance(); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Get schema for a stream. * @@ -232,6 +263,17 @@ public FlightInfo getSqlInfo(final Iterable info, final CallOption... o return client.getInfo(descriptor, options); } + /** + * Get the schema of {@link #getSqlInfo(SqlInfo...)} from the server. + * + *

Should be identical to {@link FlightSqlProducer.Schemas#GET_SQL_INFO_SCHEMA}. + */ + public SchemaResult getSqlInfoSchema(final CallOption... options) { + final CommandGetSqlInfo command = CommandGetSqlInfo.getDefaultInstance(); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Request the information about the data types supported related to * a filter data type. @@ -262,6 +304,17 @@ public FlightInfo getXdbcTypeInfo(final CallOption... options) { return client.getInfo(descriptor, options); } + /** + * Get the schema of {@link #getXdbcTypeInfo(CallOption...)} from the server. + * + *

Should be identical to {@link FlightSqlProducer.Schemas#GET_TYPE_INFO_SCHEMA}. + */ + public SchemaResult getXdbcTypeInfoSchema(final CallOption... options) { + final CommandGetXdbcTypeInfo command = CommandGetXdbcTypeInfo.getDefaultInstance(); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Request a list of tables. * @@ -299,6 +352,18 @@ public FlightInfo getTables(final String catalog, final String dbSchemaFilterPat return client.getInfo(descriptor, options); } + /** + * Get the schema of {@link #getTables(String, String, String, List, boolean, CallOption...)} from the server. + * + *

Should be identical to {@link FlightSqlProducer.Schemas#GET_TABLES_SCHEMA} or + * {@link FlightSqlProducer.Schemas#GET_TABLES_SCHEMA_NO_SCHEMA}. + */ + public SchemaResult getTablesSchema(boolean includeSchema, final CallOption... options) { + final CommandGetTables command = CommandGetTables.newBuilder().setIncludeSchema(includeSchema).build(); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Request the primary keys for a table. * @@ -324,6 +389,17 @@ public FlightInfo getPrimaryKeys(final TableRef tableRef, final CallOption... op return client.getInfo(descriptor, options); } + /** + * Get the schema of {@link #getPrimaryKeys(TableRef, CallOption...)} from the server. + * + *

Should be identical to {@link FlightSqlProducer.Schemas#GET_PRIMARY_KEYS_SCHEMA}. + */ + public SchemaResult getPrimaryKeysSchema(final CallOption... options) { + final CommandGetPrimaryKeys command = CommandGetPrimaryKeys.getDefaultInstance(); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Retrieves a description about the foreign key columns that reference the primary key columns of the given table. * @@ -351,6 +427,17 @@ public FlightInfo getExportedKeys(final TableRef tableRef, final CallOption... o return client.getInfo(descriptor, options); } + /** + * Get the schema of {@link #getExportedKeys(TableRef, CallOption...)} from the server. + * + *

Should be identical to {@link FlightSqlProducer.Schemas#GET_EXPORTED_KEYS_SCHEMA}. + */ + public SchemaResult getExportedKeysSchema(final CallOption... options) { + final CommandGetExportedKeys command = CommandGetExportedKeys.getDefaultInstance(); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Retrieves the foreign key columns for the given table. * @@ -379,6 +466,17 @@ public FlightInfo getImportedKeys(final TableRef tableRef, return client.getInfo(descriptor, options); } + /** + * Get the schema of {@link #getImportedKeys(TableRef, CallOption...)} from the server. + * + *

Should be identical to {@link FlightSqlProducer.Schemas#GET_IMPORTED_KEYS_SCHEMA}. + */ + public SchemaResult getImportedKeysSchema(final CallOption... options) { + final CommandGetImportedKeys command = CommandGetImportedKeys.getDefaultInstance(); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Retrieves a description of the foreign key columns that reference the given table's * primary key columns (the foreign keys exported by a table). @@ -418,6 +516,17 @@ public FlightInfo getCrossReference(final TableRef pkTableRef, return client.getInfo(descriptor, options); } + /** + * Get the schema of {@link #getCrossReference(TableRef, TableRef, CallOption...)} from the server. + * + *

Should be identical to {@link FlightSqlProducer.Schemas#GET_CROSS_REFERENCE_SCHEMA}. + */ + public SchemaResult getCrossReferenceSchema(final CallOption... options) { + final CommandGetCrossReference command = CommandGetCrossReference.getDefaultInstance(); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Request a list of table types. * @@ -430,6 +539,17 @@ public FlightInfo getTableTypes(final CallOption... options) { return client.getInfo(descriptor, options); } + /** + * Get the schema of {@link #getTableTypes(CallOption...)} from the server. + * + *

Should be identical to {@link FlightSqlProducer.Schemas#GET_TABLE_TYPES_SCHEMA}. + */ + public SchemaResult getTableTypesSchema(final CallOption... options) { + final CommandGetTableTypes command = CommandGetTableTypes.getDefaultInstance(); + final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); + return client.getSchema(descriptor, options); + } + /** * Create a prepared statement on the server. * @@ -442,12 +562,8 @@ public PreparedStatement prepare(final String query, final CallOption... options } @Override - public void close() throws SQLException { - try { - AutoCloseables.close(client); - } catch (final Exception e) { - throw new SQLException(e); - } + public void close() throws Exception { + AutoCloseables.close(client); } /** @@ -539,6 +655,20 @@ public Schema getParameterSchema() { return parameterSchema; } + /** + * Get the schema of the result set (should be identical to {@link #getResultSetSchema()}). + */ + public SchemaResult fetchSchema(CallOption... options) { + checkOpen(); + + final FlightDescriptor descriptor = FlightDescriptor + .command(Any.pack(CommandPreparedStatementQuery.newBuilder() + .setPreparedStatementHandle(preparedStatementResult.getPreparedStatementHandle()) + .build()) + .toByteArray()); + return client.getSchema(descriptor, options); + } + private Schema deserializeSchema(final ByteString bytes) { try { return bytes.isEmpty() ? @@ -557,7 +687,7 @@ private Schema deserializeSchema(final ByteString bytes) { * @param options RPC-layer hints for this call. * @return a FlightInfo object representing the stream(s) to fetch. */ - public FlightInfo execute(final CallOption... options) throws SQLException { + public FlightInfo execute(final CallOption... options) { checkOpen(); final FlightDescriptor descriptor = FlightDescriptor diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java index c617c6a03eec9..4226ec9e228cf 100644 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java +++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java @@ -147,26 +147,32 @@ default SchemaResult getSchema(CallContext context, FlightDescriptor descriptor) if (command.is(CommandStatementQuery.class)) { return getSchemaStatement( FlightSqlUtils.unpackOrThrow(command, CommandStatementQuery.class), context, descriptor); + } else if (command.is(CommandPreparedStatementQuery.class)) { + return getSchemaPreparedStatement( + FlightSqlUtils.unpackOrThrow(command, CommandPreparedStatementQuery.class), context, descriptor); } else if (command.is(CommandGetCatalogs.class)) { return new SchemaResult(Schemas.GET_CATALOGS_SCHEMA); + } else if (command.is(CommandGetCrossReference.class)) { + return new SchemaResult(Schemas.GET_CROSS_REFERENCE_SCHEMA); } else if (command.is(CommandGetDbSchemas.class)) { return new SchemaResult(Schemas.GET_SCHEMAS_SCHEMA); + } else if (command.is(CommandGetExportedKeys.class)) { + return new SchemaResult(Schemas.GET_EXPORTED_KEYS_SCHEMA); + } else if (command.is(CommandGetImportedKeys.class)) { + return new SchemaResult(Schemas.GET_IMPORTED_KEYS_SCHEMA); + } else if (command.is(CommandGetPrimaryKeys.class)) { + return new SchemaResult(Schemas.GET_PRIMARY_KEYS_SCHEMA); } else if (command.is(CommandGetTables.class)) { - return new SchemaResult(Schemas.GET_TABLES_SCHEMA); + if (FlightSqlUtils.unpackOrThrow(command, CommandGetTables.class).getIncludeSchema()) { + return new SchemaResult(Schemas.GET_TABLES_SCHEMA); + } + return new SchemaResult(Schemas.GET_TABLES_SCHEMA_NO_SCHEMA); } else if (command.is(CommandGetTableTypes.class)) { return new SchemaResult(Schemas.GET_TABLE_TYPES_SCHEMA); } else if (command.is(CommandGetSqlInfo.class)) { return new SchemaResult(Schemas.GET_SQL_INFO_SCHEMA); } else if (command.is(CommandGetXdbcTypeInfo.class)) { return new SchemaResult(Schemas.GET_TYPE_INFO_SCHEMA); - } else if (command.is(CommandGetPrimaryKeys.class)) { - return new SchemaResult(Schemas.GET_PRIMARY_KEYS_SCHEMA); - } else if (command.is(CommandGetImportedKeys.class)) { - return new SchemaResult(Schemas.GET_IMPORTED_KEYS_SCHEMA); - } else if (command.is(CommandGetExportedKeys.class)) { - return new SchemaResult(Schemas.GET_EXPORTED_KEYS_SCHEMA); - } else if (command.is(CommandGetCrossReference.class)) { - return new SchemaResult(Schemas.GET_CROSS_REFERENCE_SCHEMA); } throw CallStatus.INVALID_ARGUMENT.withDescription("Invalid command provided.").toRuntimeException(); @@ -336,16 +342,31 @@ FlightInfo getFlightInfoPreparedStatement(CommandPreparedStatementQuery command, CallContext context, FlightDescriptor descriptor); /** - * Gets schema about a particular SQL query based data stream. + * Get the schema of the result set of a query. * - * @param command The sql command to generate the data stream. + * @param command The SQL query. * @param context Per-call context. * @param descriptor The descriptor identifying the data stream. - * @return Schema for the stream. + * @return the schema of the result set. */ SchemaResult getSchemaStatement(CommandStatementQuery command, CallContext context, FlightDescriptor descriptor); + /** + * Get the schema of the result set of a prepared statement. + * + * @param command The prepared statement handle. + * @param context Per-call context. + * @param descriptor The descriptor identifying the data stream. + * @return the schema of the result set. + */ + default SchemaResult getSchemaPreparedStatement(CommandPreparedStatementQuery command, CallContext context, + FlightDescriptor descriptor) { + throw CallStatus.UNIMPLEMENTED + .withDescription("GetSchema with CommandPreparedStatementQuery is not implemented") + .toRuntimeException(); + } + /** * Returns data for a SQL query based data stream. * @param ticket Ticket message containing the statement handle. diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java index 25affa8f08aaa..e461515c40ecd 100644 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java +++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java @@ -76,7 +76,7 @@ public static T unpackOrThrow(Any source, Class as) { return source.unpack(as); } catch (final InvalidProtocolBufferException e) { throw CallStatus.INVALID_ARGUMENT - .withDescription("Provided message cannot be unpacked as desired type.") + .withDescription("Provided message cannot be unpacked as " + as.getName() + ": " + e) .withCause(e) .toRuntimeException(); } diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java index cc8a12d884a95..d66b8df9283bf 100644 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java @@ -41,7 +41,6 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; -import java.math.BigDecimal; import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.file.Files; @@ -50,16 +49,13 @@ import java.nio.file.Paths; import java.sql.Connection; import java.sql.DatabaseMetaData; -import java.sql.Date; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; +import java.sql.SQLSyntaxErrorException; import java.sql.Statement; -import java.sql.Time; -import java.sql.Timestamp; -import java.time.LocalDateTime; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; @@ -71,7 +67,6 @@ import java.util.Objects; import java.util.Properties; import java.util.Set; -import java.util.TimeZone; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -82,12 +77,14 @@ import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; +import org.apache.arrow.adapter.jdbc.JdbcParameterBinder; import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; import org.apache.arrow.flight.CallStatus; import org.apache.arrow.flight.Criteria; import org.apache.arrow.flight.FlightDescriptor; import org.apache.arrow.flight.FlightEndpoint; import org.apache.arrow.flight.FlightInfo; +import org.apache.arrow.flight.FlightServer; import org.apache.arrow.flight.FlightStream; import org.apache.arrow.flight.Location; import org.apache.arrow.flight.PutResult; @@ -115,32 +112,10 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorLoader; @@ -176,15 +151,8 @@ import com.google.protobuf.ProtocolStringList; /** - * Proof of concept {@link FlightSqlProducer} implementation showing an Apache Derby backed Flight SQL server capable - * of the following workflows: - * - * - returning a list of tables from the action `GetTables`. - * - creation of a prepared statement from the action `CreatePreparedStatement`. - * - execution of a prepared statement by using a {@link CommandPreparedStatementQuery} - * with {@link #getFlightInfo} and {@link #getStream}. + * Example {@link FlightSqlProducer} implementation showing an Apache Derby backed Flight SQL server that generally + * supports all current features of Flight SQL. */ public class FlightSqlExample implements FlightSqlProducer, AutoCloseable { private static final String DATABASE_URI = "jdbc:derby:target/derbyDB"; @@ -199,6 +167,17 @@ public class FlightSqlExample implements FlightSqlProducer, AutoCloseable { private final Cache> statementLoadingCache; private final SqlInfoBuilder sqlInfoBuilder; + public static void main(String[] args) throws Exception { + Location location = Location.forGrpcInsecure("localhost", 55555); + final FlightSqlExample example = new FlightSqlExample(location); + Location listenLocation = Location.forGrpcInsecure("0.0.0.0", 55555); + try (final BufferAllocator allocator = new RootAllocator(); + final FlightServer server = FlightServer.builder(allocator, listenLocation, example).build()) { + server.start(); + server.awaitTermination(); + } + } + public FlightSqlExample(final Location location) { // TODO Constructor should not be doing work. checkState( @@ -687,7 +666,7 @@ public void getStreamPreparedStatement(final CommandPreparedStatementQuery comma } } catch (final SQLException | IOException e) { LOGGER.error(format("Failed to getStreamPreparedStatement: <%s>.", e.getMessage()), e); - listener.error(e); + listener.error(CallStatus.INTERNAL.withDescription("Failed to prepare statement: " + e).toRuntimeException()); } finally { listener.completed(); } @@ -812,11 +791,16 @@ public void createPreparedStatement(final ActionCreatePreparedStatementRequest r .setPreparedStatementHandle(preparedStatementHandle) .build(); listener.onNext(new Result(pack(result).toByteArray())); + } catch (final SQLException e) { + listener.onError(CallStatus.INTERNAL + .withDescription("Failed to create prepared statement: " + e) + .toRuntimeException()); + return; } catch (final Throwable t) { - listener.onError(t); - } finally { - listener.onCompleted(); + listener.onError(CallStatus.INTERNAL.withDescription("Unknown error: " + t).toRuntimeException()); + return; } + listener.onCompleted(); }); } @@ -845,8 +829,14 @@ public Runnable acceptPutStatement(CommandStatementUpdate command, ackStream.onNext(PutResult.metadata(buffer)); ackStream.onCompleted(); } + } catch (SQLSyntaxErrorException e) { + ackStream.onError(CallStatus.INVALID_ARGUMENT + .withDescription("Failed to execute statement (invalid syntax): " + e) + .toRuntimeException()); } catch (SQLException e) { - ackStream.onError(e); + ackStream.onError(CallStatus.INTERNAL + .withDescription("Failed to execute statement: " + e) + .toRuntimeException()); } }; } @@ -858,7 +848,12 @@ public Runnable acceptPutPreparedStatementUpdate(CommandPreparedStatementUpdate preparedStatementLoadingCache.getIfPresent(command.getPreparedStatementHandle()); return () -> { - assert statement != null; + if (statement == null) { + ackStream.onError(CallStatus.NOT_FOUND + .withDescription("Prepared statement does not exist") + .toRuntimeException()); + return; + } try { final PreparedStatement preparedStatement = statement.getStatement(); @@ -872,9 +867,12 @@ public Runnable acceptPutPreparedStatementUpdate(CommandPreparedStatementUpdate preparedStatement.execute(); recordCount = preparedStatement.getUpdateCount(); } else { - setDataPreparedStatement(preparedStatement, root, true); - int[] recordCount1 = preparedStatement.executeBatch(); - recordCount = Arrays.stream(recordCount1).sum(); + final JdbcParameterBinder binder = JdbcParameterBinder.builder(preparedStatement, root).bindAll().build(); + while (binder.next()) { + preparedStatement.addBatch(); + } + int[] recordCounts = preparedStatement.executeBatch(); + recordCount = Arrays.stream(recordCounts).sum(); } final DoPutUpdateResult build = @@ -886,501 +884,13 @@ public Runnable acceptPutPreparedStatementUpdate(CommandPreparedStatementUpdate } } } catch (SQLException e) { - ackStream.onError(e); + ackStream.onError(CallStatus.INTERNAL.withDescription("Failed to execute update: " + e).toRuntimeException()); return; } ackStream.onCompleted(); }; } - /** - * Method responsible to set the parameters, to the preparedStatement object, sent via doPut request. - * - * @param preparedStatement the preparedStatement object for the operation. - * @param root a {@link VectorSchemaRoot} object contain the values to be used in the - * PreparedStatement setters. - * @param isUpdate a flag to indicate if is an update or query operation. - * @throws SQLException in case of error. - */ - private void setDataPreparedStatement(PreparedStatement preparedStatement, VectorSchemaRoot root, - boolean isUpdate) - throws SQLException { - for (int i = 0; i < root.getRowCount(); i++) { - for (FieldVector vector : root.getFieldVectors()) { - final int vectorPosition = root.getFieldVectors().indexOf(vector); - final int position = vectorPosition + 1; - - if (vector instanceof UInt1Vector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (UInt1Vector) vector); - } else if (vector instanceof TimeStampNanoTZVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (TimeStampNanoTZVector) vector); - } else if (vector instanceof TimeStampMicroTZVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (TimeStampMicroTZVector) vector); - } else if (vector instanceof TimeStampMilliTZVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (TimeStampMilliTZVector) vector); - } else if (vector instanceof TimeStampSecTZVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (TimeStampSecTZVector) vector); - } else if (vector instanceof UInt2Vector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (UInt2Vector) vector); - } else if (vector instanceof UInt4Vector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (UInt4Vector) vector); - } else if (vector instanceof UInt8Vector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (UInt8Vector) vector); - } else if (vector instanceof TinyIntVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (TinyIntVector) vector); - } else if (vector instanceof SmallIntVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (SmallIntVector) vector); - } else if (vector instanceof IntVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (IntVector) vector); - } else if (vector instanceof BigIntVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (BigIntVector) vector); - } else if (vector instanceof Float4Vector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (Float4Vector) vector); - } else if (vector instanceof Float8Vector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (Float8Vector) vector); - } else if (vector instanceof BitVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (BitVector) vector); - } else if (vector instanceof DecimalVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (DecimalVector) vector); - } else if (vector instanceof Decimal256Vector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (Decimal256Vector) vector); - } else if (vector instanceof TimeStampVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (TimeStampVector) vector); - } else if (vector instanceof TimeNanoVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (TimeNanoVector) vector); - } else if (vector instanceof TimeMicroVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (TimeMicroVector) vector); - } else if (vector instanceof TimeMilliVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (TimeMilliVector) vector); - } else if (vector instanceof TimeSecVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (TimeSecVector) vector); - } else if (vector instanceof DateDayVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (DateDayVector) vector); - } else if (vector instanceof DateMilliVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (DateMilliVector) vector); - } else if (vector instanceof VarCharVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (VarCharVector) vector); - } else if (vector instanceof LargeVarCharVector) { - setOnPreparedStatement(preparedStatement, position, vectorPosition, (LargeVarCharVector) vector); - } - } - if (isUpdate) { - preparedStatement.addBatch(); - } - } - } - - protected TimeZone getTimeZoneForVector(TimeStampVector vector) { - ArrowType.Timestamp arrowType = (ArrowType.Timestamp) vector.getField().getFieldType().getType(); - - String timezoneName = arrowType.getTimezone(); - if (timezoneName == null) { - return TimeZone.getDefault(); - } - - return TimeZone.getTimeZone(timezoneName); - } - - /** - * Set a string parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, VarCharVector vector) - throws SQLException { - final Text object = vector.getObject(vectorIndex); - statement.setObject(column, object.toString()); - } - - /** - * Set a string parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, - LargeVarCharVector vector) - throws SQLException { - final Text object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a byte parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, TinyIntVector vector) - throws SQLException { - final Byte object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a short parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, SmallIntVector vector) - throws SQLException { - final Short object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set an integer parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, IntVector vector) - throws SQLException { - final Integer object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a long parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, BigIntVector vector) - throws SQLException { - final Long object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a float parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, Float4Vector vector) - throws SQLException { - final Float object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a double parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, Float8Vector vector) - throws SQLException { - final Double object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a BigDecimal parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, DecimalVector vector) - throws SQLException { - final BigDecimal object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a BigDecimal parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, Decimal256Vector vector) - throws SQLException { - final BigDecimal object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a timestamp parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, TimeStampVector vector) - throws SQLException { - final Object object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a time parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, TimeNanoVector vector) - throws SQLException { - final Long object = vector.getObject(vectorIndex); - statement.setTime(column, new Time(object * 1000L)); - } - - /** - * Set a time parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, TimeMicroVector vector) - throws SQLException { - final Long object = vector.getObject(vectorIndex); - statement.setTime(column, new Time(object / 1000L)); - } - - /** - * Set a time parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, TimeMilliVector vector) - throws SQLException { - final LocalDateTime object = vector.getObject(vectorIndex); - statement.setTime(column, Time.valueOf(object.toLocalTime())); - } - - /** - * Set a time parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, TimeSecVector vector) - throws SQLException { - final Integer object = vector.getObject(vectorIndex); - statement.setTime(column, new Time(object)); - } - - /** - * Set a date parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, DateDayVector vector) - throws SQLException { - final Integer object = vector.getObject(vectorIndex); - statement.setDate(column, new Date(TimeUnit.DAYS.toMillis(object))); - } - - /** - * Set a date parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, DateMilliVector vector) - throws SQLException { - final LocalDateTime object = vector.getObject(vectorIndex); - statement.setDate(column, Date.valueOf(object.toLocalDate())); - - } - - /** - * Set an unsigned 1 byte number parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, UInt1Vector vector) - throws SQLException { - final Byte object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set an unsigned 2 bytes number parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, UInt2Vector vector) - throws SQLException { - final Character object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set an unsigned 4 bytes number parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, UInt4Vector vector) - throws SQLException { - final Integer object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set an unsigned 8 bytes number parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, UInt8Vector vector) - throws SQLException { - final Long object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a boolean parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, BitVector vector) - throws SQLException { - final Boolean object = vector.getObject(vectorIndex); - statement.setObject(column, object); - } - - /** - * Set a timestamp parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, - TimeStampNanoTZVector vector) - throws SQLException { - final Long object = vector.getObject(vectorIndex); - statement.setTimestamp(column, new Timestamp(object / 1000000L), - Calendar.getInstance(getTimeZoneForVector(vector))); - } - - /** - * Set a timestamp parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, - TimeStampMicroTZVector vector) - throws SQLException { - final Long object = vector.getObject(vectorIndex); - statement.setTimestamp(column, new Timestamp(object / 1000L), - Calendar.getInstance(getTimeZoneForVector(vector))); - } - - /** - * Set a timestamp parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, - TimeStampMilliTZVector vector) - throws SQLException { - final Long object = vector.getObject(vectorIndex); - statement.setTimestamp(column, new Timestamp(object), - Calendar.getInstance(getTimeZoneForVector(vector))); - } - - /** - * Set a timestamp parameter to the preparedStatement object. - * - * @param statement an instance of the {@link PreparedStatement} class. - * @param column the index of the column in the {@link PreparedStatement}. - * @param vectorIndex the index from the vector which contain the value. - * @param vector an instance of the vector the will be accessed. - * @throws SQLException in case of error. - */ - public void setOnPreparedStatement(PreparedStatement statement, int column, int vectorIndex, - TimeStampSecTZVector vector) - throws SQLException { - final Long object = vector.getObject(vectorIndex); - statement.setTimestamp(column, new Timestamp(object * 1000L), - Calendar.getInstance(getTimeZoneForVector(vector))); - } - @Override public Runnable acceptPutPreparedStatementQuery(CommandPreparedStatementQuery command, CallContext context, FlightStream flightStream, StreamListener ackStream) { @@ -1394,7 +904,10 @@ public Runnable acceptPutPreparedStatementQuery(CommandPreparedStatementQuery co try { while (flightStream.next()) { final VectorSchemaRoot root = flightStream.getRoot(); - setDataPreparedStatement(preparedStatement, root, false); + final JdbcParameterBinder binder = JdbcParameterBinder.builder(preparedStatement, root).bindAll().build(); + while (binder.next()) { + // Do not execute() - will be done in a getStream call + } } } catch (SQLException e) { diff --git a/java/flight/pom.xml b/java/flight/pom.xml index 01438a2416c90..d8b02bee7ab5c 100644 --- a/java/flight/pom.xml +++ b/java/flight/pom.xml @@ -15,7 +15,7 @@ arrow-java-root org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT 4.0.0 @@ -28,7 +28,7 @@ flight-core flight-grpc flight-sql - flight-jdbc-driver + flight-sql-jdbc-driver flight-integration-tests diff --git a/java/format/pom.xml b/java/format/pom.xml index 865d08e4e242e..c5f74d57fc29a 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -15,7 +15,7 @@ arrow-java-root org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT arrow-format diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index 2d64b82b45bf9..3c6ba7b12c59e 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT org.apache.arrow.gandiva diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 1a0e7f1d13153..073a20470c2af 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -13,7 +13,7 @@ arrow-memory org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT 4.0.0 diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java index 16ef39702ca3e..c48ee66c2cc5d 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java @@ -133,7 +133,11 @@ public Object run() { } DIRECT_BUFFER_CONSTRUCTOR = directBufferConstructor; } catch (Throwable e) { - throw new RuntimeException("Failed to initialize MemoryUtil.", e); + // This exception will get swallowed, but it's necessary for the static analysis that ensures + // the static fields above get initialized + final RuntimeException failure = new RuntimeException("Failed to initialize MemoryUtil", e); + failure.printStackTrace(); + throw failure; } } diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml index 3a8af2bb3b0dd..7f140e5caa562 100644 --- a/java/memory/memory-netty/pom.xml +++ b/java/memory/memory-netty/pom.xml @@ -13,7 +13,7 @@ arrow-memory org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT 4.0.0 diff --git a/java/memory/memory-netty/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java b/java/memory/memory-netty/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java index e900b1ca7adbe..e51c6c3d48882 100644 --- a/java/memory/memory-netty/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java +++ b/java/memory/memory-netty/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java @@ -31,16 +31,7 @@ * Netty classes and underlying Netty memory management. */ public class UnsafeDirectLittleEndian extends WrappedByteBuf { - - public static final boolean ASSERT_ENABLED; private static final AtomicLong ID_GENERATOR = new AtomicLong(0); - - static { - boolean isAssertEnabled = false; - assert isAssertEnabled = true; - ASSERT_ENABLED = isAssertEnabled; - } - public final long id = ID_GENERATOR.incrementAndGet(); private final AbstractByteBuf wrapped; private final long memoryAddress; diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml index 5a8ce2a20301e..3e1c14b93bffb 100644 --- a/java/memory/memory-unsafe/pom.xml +++ b/java/memory/memory-unsafe/pom.xml @@ -13,7 +13,7 @@ arrow-memory org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT 4.0.0 diff --git a/java/memory/pom.xml b/java/memory/pom.xml index 7d39f44ac018a..cdbb3842f2b71 100644 --- a/java/memory/pom.xml +++ b/java/memory/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT arrow-memory Arrow Memory diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 0b6aab8fac1d5..479d5e5ab17c1 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -14,7 +14,7 @@ arrow-java-root org.apache.arrow - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT arrow-performance jar @@ -74,7 +74,7 @@ org.apache.arrow arrow-algorithm - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT test diff --git a/java/plasma/pom.xml b/java/plasma/pom.xml index 0608128bc2e94..a9281d44e9a6e 100644 --- a/java/plasma/pom.xml +++ b/java/plasma/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT arrow-plasma Arrow Plasma Client diff --git a/java/pom.xml b/java/pom.xml index 10e0bddeec994..c1710f57b424e 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -20,7 +20,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT pom Apache Arrow Java Root POM @@ -252,7 +252,7 @@ pl.project13.maven git-commit-id-plugin - 2.2.2 + 4.0.5 for-jars @@ -354,6 +354,7 @@ javax.annotation:javax.annotation-api:* + org.apache.hadoop:hadoop-client-api @@ -398,7 +399,7 @@ maven-surefire-plugin - 3.0.0-M3 + 3.0.0-M7 true true @@ -786,7 +787,7 @@ error-prone-jdk11+ - [11,) + [11,] !m2e.version @@ -802,10 +803,7 @@ UTF-8 -XDcompilePolicy=simple - - -Xplugin:ErrorProne \ - -XepExcludedPaths:.*/(target/generated-sources)/.* - + -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-sources)/.* -J--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED -J--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED -J--add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED @@ -888,6 +886,26 @@ + + windows + + [17,] + + windows + + + + + + maven-surefire-plugin + + false + + + + + + diff --git a/java/tools/pom.xml b/java/tools/pom.xml index a46548e7db143..3925870538f2a 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT arrow-tools Arrow Tools diff --git a/java/vector/pom.xml b/java/vector/pom.xml index 8bf184a4a58e4..dbb0a533ef9a3 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 9.0.0-SNAPSHOT + 10.0.0-SNAPSHOT arrow-vector Arrow Vectors diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index 866dd9e218fc1..2a89590bf8440 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -46,6 +46,7 @@ public abstract class BaseVariableWidthVector extends BaseValueVector implements VariableWidthVector, FieldVector, VectorDefinitionSetter { private static final int DEFAULT_RECORD_BYTE_COUNT = 8; private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; + private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); private int lastValueCapacity; private long lastValueAllocationSizeInBytes; @@ -430,9 +431,10 @@ public void allocateNew(int valueCount) { /* Check if the data buffer size is within bounds. */ private void checkDataBufferSize(long size) { - if (size > MAX_ALLOCATION_SIZE || size < 0) { + if (size > MAX_BUFFER_SIZE || size < 0) { throw new OversizedAllocationException("Memory required for vector " + - " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")"); + "is (" + size + "), which is overflow or more than max allowed (" + MAX_BUFFER_SIZE + "). " + + "You could consider using LargeVarCharVector/LargeVarBinaryVector for large strings/large bytes types"); } } @@ -445,10 +447,10 @@ private long computeAndCheckOffsetsBufferSize(int valueCount) { * an additional slot in offset buffer. */ final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH); - if (size > MAX_ALLOCATION_SIZE) { + if (size > MAX_BUFFER_SIZE) { throw new OversizedAllocationException("Memory required for vector capacity " + valueCount + - " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")"); + " is (" + size + "), which is more than max allowed (" + MAX_BUFFER_SIZE + ")"); } return size; } @@ -514,13 +516,33 @@ public void reallocDataBuffer() { newAllocationSize = INITIAL_BYTE_COUNT * 2L; } } - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + + reallocDataBuffer(newAllocationSize); + } + + /** + * Reallocate the data buffer to given size. Data Buffer stores the actual data for + * VARCHAR or VARBINARY elements in the vector. The actual allocate size may be larger + * than the request one because it will round up the provided value to the nearest + * power of two. + * + * @param desiredAllocSize the desired new allocation size + * @throws OversizedAllocationException if the desired new size is more than + * max allowed + * @throws OutOfMemoryException if the internal memory allocation fails + */ + public void reallocDataBuffer(long desiredAllocSize) { + if (desiredAllocSize == 0) { + return; + } + + final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); assert newAllocationSize >= 1; checkDataBufferSize(newAllocationSize); final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity); + newBuf.setBytes(0, valueBuffer, 0, valueBuffer.capacity()); valueBuffer.getReferenceManager().release(); valueBuffer = newBuf; lastValueAllocationSizeInBytes = valueBuffer.capacity(); @@ -1250,9 +1272,10 @@ protected final void handleSafe(int index, int dataLength) { while (index >= getValueCapacity()) { reallocValidityAndOffsetBuffers(); } - final int startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1); - while (valueBuffer.capacity() < (startOffset + dataLength)) { - reallocDataBuffer(); + final long startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1); + final long targetCapacity = startOffset + dataLength; + if (valueBuffer.capacity() < targetCapacity) { + reallocDataBuffer(targetCapacity); } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index d8fe72a7074b1..0fa091fb0cede 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -76,13 +76,13 @@ public static ListVector empty(String name, BufferAllocator allocator) { protected ArrowBuf validityBuffer; protected UnionListReader reader; private CallBack callBack; - private final FieldType fieldType; - private int validityAllocationSizeInBytes; + protected final FieldType fieldType; + protected int validityAllocationSizeInBytes; /** * The maximum index that is actually set. */ - private int lastSet; + protected int lastSet; /** * Constructs a new instance. @@ -276,7 +276,7 @@ public boolean allocateNewSafe() { return true; } - private void allocateValidityBuffer(final long size) { + protected void allocateValidityBuffer(final long size) { final int curSize = (int) size; validityBuffer = allocator.buffer(curSize); validityBuffer.readerIndex(0); @@ -296,7 +296,7 @@ public void reAlloc() { super.reAlloc(); } - private void reallocValidityAndOffsetBuffers() { + protected void reallocValidityAndOffsetBuffers() { reallocOffsetBuffer(); reallocValidityBuffer(); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 14cba0926e193..b8f3f32a73a29 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -22,8 +22,12 @@ import java.util.List; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.complex.impl.UnionMapWriter; import org.apache.arrow.vector.types.Types; @@ -32,6 +36,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.TransferPair; /** * A MapVector is used to store entries of key/value pairs. It is a container vector that is @@ -119,4 +124,151 @@ public UnionMapReader getReader() { public MinorType getMinorType() { return MinorType.MAP; } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return getTransferPair(ref, allocator, null); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + return new TransferImpl(ref, allocator, callBack); + } + + @Override + public TransferPair makeTransferPair(ValueVector target) { + return new MapVector.TransferImpl((MapVector) target); + } + + private class TransferImpl implements TransferPair { + + MapVector to; + TransferPair dataTransferPair; + + public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { + this(new MapVector(name, allocator, fieldType, callBack)); + } + + public TransferImpl(MapVector to) { + this.to = to; + to.addOrGetVector(vector.getField().getFieldType()); + if (to.getDataVector() instanceof ZeroVector) { + to.addOrGetVector(vector.getField().getFieldType()); + } + dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); + } + + /** + * Transfer this vector'data to another vector. The memory associated + * with this vector is transferred to the allocator of target vector + * for accounting and management purposes. + */ + @Override + public void transfer() { + to.clear(); + dataTransferPair.transfer(); + to.validityBuffer = transferBuffer(validityBuffer, to.allocator); + to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator); + to.lastSet = lastSet; + if (valueCount > 0) { + to.setValueCount(valueCount); + } + clear(); + } + + /** + * Slice this vector at desired index and length and transfer the + * corresponding data to the target vector. + * @param startIndex start position of the split in source vector. + * @param length length of the split. + */ + @Override + public void splitAndTransfer(int startIndex, int length) { + Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, + "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount); + final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH); + final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint; + to.clear(); + to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); + /* splitAndTransfer offset buffer */ + for (int i = 0; i < length + 1; i++) { + final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint; + to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset); + } + /* splitAndTransfer validity buffer */ + splitAndTransferValidityBuffer(startIndex, length, to); + /* splitAndTransfer data buffer */ + dataTransferPair.splitAndTransfer(startPoint, sliceLength); + to.lastSet = length - 1; + to.setValueCount(length); + } + + /* + * transfer the validity. + */ + private void splitAndTransferValidityBuffer(int startIndex, int length, MapVector target) { + int firstByteSource = BitVectorHelper.byteIndex(startIndex); + int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); + int byteSizeTarget = getValidityBufferSizeFromCount(length); + int offset = startIndex % 8; + + if (length > 0) { + if (offset == 0) { + // slice + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); + } + target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer.getReferenceManager().retain(1); + } else { + /* Copy data + * When the first bit starts from the middle of a byte (offset != 0), + * copy data from src BitVector. + * Each byte in the target is composed by a part in i-th byte, + * another part in (i+1)-th byte. + */ + target.allocateValidityBuffer(byteSizeTarget); + + for (int i = 0; i < byteSizeTarget - 1; i++) { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); + byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset); + + target.validityBuffer.setByte(i, (b1 + b2)); + } + + /* Copying the last piece is done in the following manner: + * if the source vector has 1 or more bytes remaining, we copy + * the last piece as a byte formed by shifting data + * from the current byte and the next byte. + * + * if the source vector has no more bytes remaining + * (we are at the last byte), we copy the last piece as a byte + * by shifting data from the current byte. + */ + if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, + firstByteSource + byteSizeTarget - 1, offset); + byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, + firstByteSource + byteSizeTarget, offset); + + target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); + } else { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, + firstByteSource + byteSizeTarget - 1, offset); + target.validityBuffer.setByte(byteSizeTarget - 1, b1); + } + } + } + } + + @Override + public ValueVector getTo() { + return to; + } + + @Override + public void copyValueSafe(int from, int to) { + this.to.copyFrom(from, to, MapVector.this); + } + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java index 41ce1fc0d10aa..3612d677ed5a9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java @@ -20,7 +20,7 @@ import java.util.AbstractMap; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -54,7 +54,7 @@ public class MapWithOrdinalImpl implements MapWithOrdinal { private static final Logger logger = LoggerFactory.getLogger(MapWithOrdinalImpl.class); - private final Map> primary = new HashMap<>(); + private final Map> primary = new LinkedHashMap<>(); private final IntObjectHashMap secondary = new IntObjectHashMap<>(); private final Map delegate = new Map() { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 9637021dbdad8..d60d5611a5f7b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import java.util.ArrayList; @@ -1110,4 +1111,26 @@ public void testClearAndReuse() { assertEquals(55, getResultValue(resultStruct)); } } + + @Test + public void testGetTransferPair() { + try (MapVector mapVector = MapVector.empty("mapVector", allocator, false)) { + + FieldType type = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); + AddOrGetResult addResult = mapVector.addOrGetVector(type); + FieldType keyType = new FieldType(false, MinorType.BIGINT.getType(), null, null); + FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType()); + addResult.getVector().addOrGet(MapVector.KEY_NAME, keyType, BigIntVector.class); + addResult.getVector().addOrGet(MapVector.VALUE_NAME, valueType, Float8Vector.class); + mapVector.allocateNew(); + mapVector.setValueCount(0); + + assertEquals(-1, mapVector.getLastSet()); + TransferPair tp = mapVector.getTransferPair(mapVector.getName(), allocator, null); + tp.transfer(); + ValueVector vector = tp.getTo(); + assertSame(vector.getClass(), mapVector.getClass()); + vector.clear(); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index e60b87e601974..716fa0bde454d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -29,8 +29,10 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; @@ -406,5 +408,25 @@ public void testStructVectorZeroStartIndexAndLength() { } } + @Test + public void testMapVectorZeroStartIndexAndLength() { + Map metadata = new HashMap<>(); + metadata.put("k1", "v1"); + FieldType type = new FieldType(true, new ArrowType.Map(false), null, metadata); + try (final MapVector mapVector = new MapVector("mapVec", allocator, type, null); + final MapVector newMapVector = new MapVector("newMapVec", allocator, type, null)) { + + mapVector.allocateNew(); + final int valueCount = 0; + mapVector.setValueCount(valueCount); + + final TransferPair tp = mapVector.makeTransferPair(newMapVector); + + tp.splitAndTransfer(0, 0); + assertEquals(valueCount, newMapVector.getValueCount()); + + newMapVector.clear(); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index 734ff46311598..b4c30480000c8 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -19,17 +19,20 @@ import static org.junit.Assert.*; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.AbstractStructVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.holders.ComplexHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; +import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.junit.After; import org.junit.Assert; @@ -171,13 +174,112 @@ public void testAddOrGetComplexChildVectors() { vector.addOrGetStruct("struct"); vector.addOrGetMap("map", true); - List childrens = vector.getChildrenFromFields(); - assertEquals(5, childrens.size()); - assertEquals(MinorType.LIST, childrens.get(0).getMinorType()); - assertEquals(MinorType.FIXED_SIZE_LIST, childrens.get(1).getMinorType()); - assertEquals(MinorType.UNION, childrens.get(2).getMinorType()); - assertEquals(MinorType.STRUCT, childrens.get(3).getMinorType()); - assertEquals(MinorType.MAP, childrens.get(4).getMinorType()); + List children = vector.getChildrenFromFields(); + assertEquals(5, children.size()); + assertEquals(MinorType.LIST, children.get(0).getMinorType()); + assertEquals(MinorType.FIXED_SIZE_LIST, children.get(1).getMinorType()); + assertEquals(MinorType.UNION, children.get(2).getMinorType()); + assertEquals(MinorType.STRUCT, children.get(3).getMinorType()); + assertEquals(MinorType.MAP, children.get(4).getMinorType()); } } + + @Test + public void testAddChildVectorsWithDuplicatedFieldNamesForConflictPolicyAppend() { + final FieldType type = new FieldType(true, Struct.INSTANCE, null, null); + try (StructVector vector = new StructVector("struct", allocator, type, null, + AbstractStructVector.ConflictPolicy.CONFLICT_APPEND, true)) { + final List initFields = new ArrayList<>(); + + // Add a bit more fields to test against stability of the internal field + // ordering mechanism of StructVector + initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("int1", MinorType.INT.getType())); + initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("int2", MinorType.INT.getType())); + initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("int3", MinorType.INT.getType())); + initFields.add(Field.nullable("uncertain-type", MinorType.INT.getType())); + + // To ensure duplicated field names don't mess up the original field order + // in the struct vector + initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("uncertain-type", MinorType.VARCHAR.getType())); + + vector.initializeChildrenFromFields(initFields); + + List children = vector.getChildrenFromFields(); + assertEquals(11, children.size()); + assertEquals("varchar1", children.get(0).getName()); + assertEquals("int1", children.get(1).getName()); + assertEquals("varchar2", children.get(2).getName()); + assertEquals("int2", children.get(3).getName()); + assertEquals("varchar3", children.get(4).getName()); + assertEquals("int3", children.get(5).getName()); + assertEquals("uncertain-type", children.get(6).getName()); + assertEquals("varchar1", children.get(7).getName()); + assertEquals("varchar2", children.get(8).getName()); + assertEquals("varchar3", children.get(9).getName()); + assertEquals("uncertain-type", children.get(10).getName()); + assertEquals(MinorType.VARCHAR, children.get(0).getMinorType()); + assertEquals(MinorType.INT, children.get(1).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(2).getMinorType()); + assertEquals(MinorType.INT, children.get(3).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(4).getMinorType()); + assertEquals(MinorType.INT, children.get(5).getMinorType()); + assertEquals(MinorType.INT, children.get(6).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(7).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(8).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(9).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(10).getMinorType()); + } + } + + @Test + public void testAddChildVectorsWithDuplicatedFieldNamesForConflictPolicyReplace() { + final FieldType type = new FieldType(true, Struct.INSTANCE, null, null); + try (StructVector vector = new StructVector("struct", allocator, type, null, + AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, true)) { + final List initFields = new ArrayList<>(); + + // Add a bit more fields to test against stability of the internal field + // ordering mechanism of StructVector + initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("int1", MinorType.INT.getType())); + initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("int2", MinorType.INT.getType())); + initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("int3", MinorType.INT.getType())); + initFields.add(Field.nullable("uncertain-type", MinorType.INT.getType())); + + // To ensure duplicated field names don't mess up the original field order + // in the struct vector + initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType())); + initFields.add(Field.nullable("uncertain-type", MinorType.VARCHAR.getType())); + + vector.initializeChildrenFromFields(initFields); + + List children = vector.getChildrenFromFields(); + assertEquals(7, children.size()); + assertEquals("varchar1", children.get(0).getName()); + assertEquals("int1", children.get(1).getName()); + assertEquals("varchar2", children.get(2).getName()); + assertEquals("int2", children.get(3).getName()); + assertEquals("varchar3", children.get(4).getName()); + assertEquals("int3", children.get(5).getName()); + assertEquals("uncertain-type", children.get(6).getName()); + assertEquals(MinorType.VARCHAR, children.get(0).getMinorType()); + assertEquals(MinorType.INT, children.get(1).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(2).getMinorType()); + assertEquals(MinorType.INT, children.get(3).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(4).getMinorType()); + assertEquals(MinorType.INT, children.get(5).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(6).getMinorType()); + } + } + } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 516daa2362280..0928d3eb03082 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -1137,6 +1137,25 @@ public void testNullableVarType2() { } } + @Test(expected = OversizedAllocationException.class) + public void testReallocateCheckSuccess() { + + // Create a new value vector for 1024 integers. + try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(1024 * 10, 1024); + + vector.set(0, STR1); + // Check the sample strings. + assertArrayEquals(STR1, vector.get(0)); + + // update the index offset to a larger one + ArrowBuf offsetBuf = vector.getOffsetBuffer(); + offsetBuf.setInt(VarBinaryVector.OFFSET_WIDTH, Integer.MAX_VALUE - 5); + + vector.setValueLengthSafe(1, 6); + } + } + /* * generic tests diff --git a/js/.eslintrc.cjs b/js/.eslintrc.cjs index 937a0958a8381..08de2b3e054e3 100644 --- a/js/.eslintrc.cjs +++ b/js/.eslintrc.cjs @@ -100,6 +100,7 @@ module.exports = { "unicorn/prefer-switch": "off", "unicorn/prefer-node-protocol": "off", "unicorn/text-encoding-identifier-case": "off", + "unicorn/prefer-top-level-await": "off", "unicorn/consistent-destructuring": "warn", "unicorn/no-array-reduce": ["warn", { "allowSimpleOperations": true }], diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js index ba349cc3b4e4d..a17087032e243 100644 --- a/js/gulp/arrow-task.js +++ b/js/gulp/arrow-task.js @@ -17,7 +17,7 @@ import { targetDir, observableFromStreams } from './util.js'; -import del from 'del'; +import { deleteAsync as del } from 'del'; import gulp from 'gulp'; import mkdirp from 'mkdirp'; import gulpRename from 'gulp-rename'; diff --git a/js/gulp/clean-task.js b/js/gulp/clean-task.js index 02da908381a73..9c9bae519d0d3 100644 --- a/js/gulp/clean-task.js +++ b/js/gulp/clean-task.js @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -import del from 'del'; +import { deleteAsync as del } from 'del'; import { targetDir } from './util.js'; import memoizeTask from './memoize-task.js'; import { catchError } from 'rxjs/operators'; diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js index 954a1c1e339be..016d33892c8f2 100644 --- a/js/gulp/test-task.js +++ b/js/gulp/test-task.js @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -import del from 'del'; +import { deleteAsync as del } from 'del'; import path from 'path'; import mkdirp from 'mkdirp'; import { argv } from './argv.js'; diff --git a/js/gulpfile.js b/js/gulpfile.js index bf3de1acbf1fb..6544b987b73f6 100644 --- a/js/gulpfile.js +++ b/js/gulpfile.js @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -import del from "del"; +import { deleteAsync as del } from 'del'; import os from "os"; import gulp from "gulp"; import { targets } from "./gulp/argv.js"; diff --git a/js/package.json b/js/package.json index 80e581dd8a85a..1fb8c4db6105a 100644 --- a/js/package.json +++ b/js/package.json @@ -54,7 +54,7 @@ "dependencies": { "@types/command-line-args": "5.2.0", "@types/command-line-usage": "5.0.2", - "@types/node": "^17.0.36", + "@types/node": "^18.6.4", "@types/pad-left": "2.1.1", "command-line-args": "5.2.1", "command-line-usage": "6.1.3", @@ -64,30 +64,30 @@ "tslib": "^2.4.0" }, "devDependencies": { - "@openpgp/web-stream-tools": "0.0.10", + "@openpgp/web-stream-tools": "0.0.11", "@rollup/plugin-alias": "3.1.9", "@rollup/plugin-node-resolve": "13.3.0", "@rollup/stream": "2.0.0", "@types/benchmark": "2.1.1", "@types/glob": "7.2.0", - "@types/jest": "27.5.1", + "@types/jest": "28.1.6", "@types/randomatic": "3.1.3", - "@typescript-eslint/eslint-plugin": "5.27.0", - "@typescript-eslint/parser": "5.27.0", - "async-done": "1.3.2", + "@typescript-eslint/eslint-plugin": "5.32.0", + "@typescript-eslint/parser": "5.32.0", + "async-done": "2.0.0", "benny": "3.7.1", "cross-env": "7.0.3", - "del-cli": "4.0.1", - "esbuild": "0.14.42", + "del-cli": "5.0.0", + "esbuild": "0.14.53", "esbuild-plugin-alias": "0.2.1", - "eslint": "8.16.0", - "eslint-plugin-jest": "26.4.6", - "eslint-plugin-unicorn": "42.0.0", + "eslint": "8.21.0", + "eslint-plugin-jest": "26.7.0", + "eslint-plugin-unicorn": "43.0.2", "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz", "glob": "8.0.3", - "google-closure-compiler": "20220502.0.0", + "google-closure-compiler": "20220719.0.0", "gulp": "4.0.2", - "gulp-esbuild": "0.10.3", + "gulp-esbuild": "0.10.4", "gulp-json-transform": "0.4.8", "gulp-rename": "2.0.0", "gulp-replace": "1.1.3", @@ -95,25 +95,25 @@ "gulp-terser": "2.1.0", "gulp-typescript": "5.0.1", "gulp-vinyl-size": "1.1.3", - "ix": "4.5.2", - "jest": "28.1.0", + "ix": "5.0.0", + "jest": "28.1.3", "jest-silent-reporter": "0.5.0", - "lerna": "5.0.0", - "memfs": "3.4.4", + "lerna": "5.3.0", + "memfs": "3.4.7", "mkdirp": "1.0.4", "multistream": "4.1.0", "randomatic": "3.1.1", - "rollup": "2.75.4", - "rxjs": "7.5.5", - "ts-jest": "28.0.3", - "ts-node": "10.8.0", - "typedoc": "0.22.16", - "typescript": "4.7.2", + "rollup": "2.77.2", + "rxjs": "7.5.6", + "ts-jest": "28.0.7", + "ts-node": "10.9.1", + "typedoc": "0.23.10", + "typescript": "4.7.4", "vinyl-buffer": "1.0.1", "vinyl-named": "1.1.0", "vinyl-source-stream": "2.0.0", "web-streams-polyfill": "3.2.1", - "webpack": "5.72.1", + "webpack": "5.74.0", "webpack-bundle-analyzer": "4.5.0", "webpack-stream": "7.0.0", "xml2js": "0.4.23" @@ -121,5 +121,5 @@ "engines": { "node": ">=12.0" }, - "version": "9.0.0-SNAPSHOT" + "version": "10.0.0-SNAPSHOT" } diff --git a/js/src/util/bn.ts b/js/src/util/bn.ts index c10d7d3c4673c..e83443a2423b0 100644 --- a/js/src/util/bn.ts +++ b/js/src/util/bn.ts @@ -119,7 +119,7 @@ function decimalToString>(a: T) { base64[0] = base64[0] - base64[1] * 10; digits = `${base64[0]}${digits}`; } while (checks[0] || checks[1] || checks[2] || checks[3]); - return digits ? digits : `0`; + return digits ?? `0`; } /** @ignore */ diff --git a/js/typedoc.json b/js/typedoc.json index 74ee29ccf8ff0..2780bd3d4f4d7 100644 --- a/js/typedoc.json +++ b/js/typedoc.json @@ -9,6 +9,7 @@ "includeVersion": true, "exclude": [ "src/fb/*.ts", - "src/bin/*.ts" + "src/bin/*.ts", + "src/ipc/metadata/message.ts" ] } diff --git a/js/yarn.lock b/js/yarn.lock index fbe8f82b51021..87e46f24831b9 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -44,150 +44,155 @@ "@arrows/error" "^1.0.2" fast-deep-equal "^3.1.3" -"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.12.13", "@babel/code-frame@^7.16.7": - version "7.16.7" - resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.16.7.tgz#44416b6bd7624b998f5b1af5d470856c40138789" - integrity sha512-iAXqUn8IIeBTNd72xsFlgaXHkMBMt6y4HJp1tIaK465CWLT/fG1aqB7ykr95gHHmlBdGbFeWWfyB4NJJ0nmeIg== +"@babel/code-frame@^7.0.0", "@babel/code-frame@^7.12.13", "@babel/code-frame@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.18.6.tgz#3b25d38c89600baa2dcc219edfa88a74eb2c427a" + integrity sha512-TDCmlK5eOvH+eH7cdAFlNXeVJqWIQ7gW9tY1GJIpUtFb6CmjVyq2VM3u71bOyR8CRihcCgMUYoDNyLXao3+70Q== dependencies: - "@babel/highlight" "^7.16.7" + "@babel/highlight" "^7.18.6" -"@babel/compat-data@^7.17.10": - version "7.17.10" - resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.17.10.tgz#711dc726a492dfc8be8220028b1b92482362baab" - integrity sha512-GZt/TCsG70Ms19gfZO1tM4CVnXsPgEPBCpJu+Qz3L0LUDsY5nZqFZglIoPC1kIYOtNBZlrnFT+klg12vFGZXrw== +"@babel/compat-data@^7.18.8": + version "7.18.8" + resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.18.8.tgz#2483f565faca607b8535590e84e7de323f27764d" + integrity sha512-HSmX4WZPPK3FUxYp7g2T6EyO8j96HlZJlxmKPSh6KAcqwyDrfx7hKjXpAW/0FhFfTJsR0Yt4lAjLI2coMptIHQ== "@babel/core@^7.11.6", "@babel/core@^7.12.3": - version "7.18.2" - resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.18.2.tgz#87b2fcd7cce9becaa7f5acebdc4f09f3dd19d876" - integrity sha512-A8pri1YJiC5UnkdrWcmfZTJTV85b4UXTAfImGmCfYmax4TR9Cw8sDS0MOk++Gp2mE/BefVJ5nwy5yzqNJbP/DQ== + version "7.18.10" + resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.18.10.tgz#39ad504991d77f1f3da91be0b8b949a5bc466fb8" + integrity sha512-JQM6k6ENcBFKVtWvLavlvi/mPcpYZ3+R+2EySDEMSMbp7Mn4FexlbbJVrx2R7Ijhr01T8gyqrOaABWIOgxeUyw== dependencies: "@ampproject/remapping" "^2.1.0" - "@babel/code-frame" "^7.16.7" - "@babel/generator" "^7.18.2" - "@babel/helper-compilation-targets" "^7.18.2" - "@babel/helper-module-transforms" "^7.18.0" - "@babel/helpers" "^7.18.2" - "@babel/parser" "^7.18.0" - "@babel/template" "^7.16.7" - "@babel/traverse" "^7.18.2" - "@babel/types" "^7.18.2" + "@babel/code-frame" "^7.18.6" + "@babel/generator" "^7.18.10" + "@babel/helper-compilation-targets" "^7.18.9" + "@babel/helper-module-transforms" "^7.18.9" + "@babel/helpers" "^7.18.9" + "@babel/parser" "^7.18.10" + "@babel/template" "^7.18.10" + "@babel/traverse" "^7.18.10" + "@babel/types" "^7.18.10" convert-source-map "^1.7.0" debug "^4.1.0" gensync "^1.0.0-beta.2" json5 "^2.2.1" semver "^6.3.0" -"@babel/generator@^7.18.2", "@babel/generator@^7.7.2": - version "7.18.2" - resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.18.2.tgz#33873d6f89b21efe2da63fe554460f3df1c5880d" - integrity sha512-W1lG5vUwFvfMd8HVXqdfbuG7RuaSrTCCD8cl8fP8wOivdbtbIg2Db3IWUcgvfxKbbn6ZBGYRW/Zk1MIwK49mgw== +"@babel/generator@^7.18.10", "@babel/generator@^7.7.2": + version "7.18.12" + resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.18.12.tgz#fa58daa303757bd6f5e4bbca91b342040463d9f4" + integrity sha512-dfQ8ebCN98SvyL7IxNMCUtZQSq5R7kxgN+r8qYTGDmmSion1hX2C0zq2yo1bsCDhXixokv1SAWTZUMYbO/V5zg== dependencies: - "@babel/types" "^7.18.2" - "@jridgewell/gen-mapping" "^0.3.0" + "@babel/types" "^7.18.10" + "@jridgewell/gen-mapping" "^0.3.2" jsesc "^2.5.1" -"@babel/helper-compilation-targets@^7.18.2": - version "7.18.2" - resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.18.2.tgz#67a85a10cbd5fc7f1457fec2e7f45441dc6c754b" - integrity sha512-s1jnPotJS9uQnzFtiZVBUxe67CuBa679oWFHpxYYnTpRL/1ffhyX44R9uYiXoa/pLXcY9H2moJta0iaanlk/rQ== +"@babel/helper-compilation-targets@^7.18.9": + version "7.18.9" + resolved "https://registry.yarnpkg.com/@babel/helper-compilation-targets/-/helper-compilation-targets-7.18.9.tgz#69e64f57b524cde3e5ff6cc5a9f4a387ee5563bf" + integrity sha512-tzLCyVmqUiFlcFoAPLA/gL9TeYrF61VLNtb+hvkuVaB5SUjW7jcfrglBIX1vUIoT7CLP3bBlIMeyEsIl2eFQNg== dependencies: - "@babel/compat-data" "^7.17.10" - "@babel/helper-validator-option" "^7.16.7" + "@babel/compat-data" "^7.18.8" + "@babel/helper-validator-option" "^7.18.6" browserslist "^4.20.2" semver "^6.3.0" -"@babel/helper-environment-visitor@^7.16.7", "@babel/helper-environment-visitor@^7.18.2": - version "7.18.2" - resolved "https://registry.yarnpkg.com/@babel/helper-environment-visitor/-/helper-environment-visitor-7.18.2.tgz#8a6d2dedb53f6bf248e31b4baf38739ee4a637bd" - integrity sha512-14GQKWkX9oJzPiQQ7/J36FTXcD4kSp8egKjO9nINlSKiHITRA9q/R74qu8S9xlc/b/yjsJItQUeeh3xnGN0voQ== - -"@babel/helper-function-name@^7.17.9": - version "7.17.9" - resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.17.9.tgz#136fcd54bc1da82fcb47565cf16fd8e444b1ff12" - integrity sha512-7cRisGlVtiVqZ0MW0/yFB4atgpGLWEHUVYnb448hZK4x+vih0YO5UoS11XIYtZYqHd0dIPMdUSv8q5K4LdMnIg== - dependencies: - "@babel/template" "^7.16.7" - "@babel/types" "^7.17.0" - -"@babel/helper-hoist-variables@^7.16.7": - version "7.16.7" - resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.16.7.tgz#86bcb19a77a509c7b77d0e22323ef588fa58c246" - integrity sha512-m04d/0Op34H5v7pbZw6pSKP7weA6lsMvfiIAMeIvkY/R4xQtBSMFEigu9QTZ2qB/9l22vsxtM8a+Q8CzD255fg== - dependencies: - "@babel/types" "^7.16.7" - -"@babel/helper-module-imports@^7.16.7": - version "7.16.7" - resolved "https://registry.yarnpkg.com/@babel/helper-module-imports/-/helper-module-imports-7.16.7.tgz#25612a8091a999704461c8a222d0efec5d091437" - integrity sha512-LVtS6TqjJHFc+nYeITRo6VLXve70xmq7wPhWTqDJusJEgGmkAACWwMiTNrvfoQo6hEhFwAIixNkvB0jPXDL8Wg== - dependencies: - "@babel/types" "^7.16.7" - -"@babel/helper-module-transforms@^7.18.0": - version "7.18.0" - resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.18.0.tgz#baf05dec7a5875fb9235bd34ca18bad4e21221cd" - integrity sha512-kclUYSUBIjlvnzN2++K9f2qzYKFgjmnmjwL4zlmU5f8ZtzgWe8s0rUPSTGy2HmK4P8T52MQsS+HTQAgZd3dMEA== - dependencies: - "@babel/helper-environment-visitor" "^7.16.7" - "@babel/helper-module-imports" "^7.16.7" - "@babel/helper-simple-access" "^7.17.7" - "@babel/helper-split-export-declaration" "^7.16.7" - "@babel/helper-validator-identifier" "^7.16.7" - "@babel/template" "^7.16.7" - "@babel/traverse" "^7.18.0" - "@babel/types" "^7.18.0" - -"@babel/helper-plugin-utils@^7.0.0", "@babel/helper-plugin-utils@^7.10.4", "@babel/helper-plugin-utils@^7.12.13", "@babel/helper-plugin-utils@^7.14.5", "@babel/helper-plugin-utils@^7.17.12", "@babel/helper-plugin-utils@^7.8.0": - version "7.17.12" - resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.17.12.tgz#86c2347da5acbf5583ba0a10aed4c9bf9da9cf96" - integrity sha512-JDkf04mqtN3y4iAbO1hv9U2ARpPyPL1zqyWs/2WG1pgSq9llHFjStX5jdxb84himgJm+8Ng+x0oiWF/nw/XQKA== - -"@babel/helper-simple-access@^7.17.7": - version "7.18.2" - resolved "https://registry.yarnpkg.com/@babel/helper-simple-access/-/helper-simple-access-7.18.2.tgz#4dc473c2169ac3a1c9f4a51cfcd091d1c36fcff9" - integrity sha512-7LIrjYzndorDY88MycupkpQLKS1AFfsVRm2k/9PtKScSy5tZq0McZTj+DiMRynboZfIqOKvo03pmhTaUgiD6fQ== - dependencies: - "@babel/types" "^7.18.2" - -"@babel/helper-split-export-declaration@^7.16.7": - version "7.16.7" - resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.16.7.tgz#0b648c0c42da9d3920d85ad585f2778620b8726b" - integrity sha512-xbWoy/PFoxSWazIToT9Sif+jJTlrMcndIsaOKvTA6u7QEo7ilkRZpjew18/W3c7nm8fXdUDXh02VXTbZ0pGDNw== - dependencies: - "@babel/types" "^7.16.7" - -"@babel/helper-validator-identifier@^7.15.7", "@babel/helper-validator-identifier@^7.16.7": - version "7.16.7" - resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.16.7.tgz#e8c602438c4a8195751243da9031d1607d247cad" - integrity sha512-hsEnFemeiW4D08A5gUAZxLBTXpZ39P+a+DGDsHw1yxqyQ/jzFEnxf5uTEGp+3bzAbNOxU1paTgYS4ECU/IgfDw== - -"@babel/helper-validator-option@^7.16.7": - version "7.16.7" - resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.16.7.tgz#b203ce62ce5fe153899b617c08957de860de4d23" - integrity sha512-TRtenOuRUVo9oIQGPC5G9DgK4743cdxvtOw0weQNpZXaS16SCBi5MNjZF8vba3ETURjZpTbVn7Vvcf2eAwFozQ== - -"@babel/helpers@^7.18.2": - version "7.18.2" - resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.18.2.tgz#970d74f0deadc3f5a938bfa250738eb4ac889384" - integrity sha512-j+d+u5xT5utcQSzrh9p+PaJX94h++KN+ng9b9WEJq7pkUPAd61FGqhjuUEdfknb3E/uDBb7ruwEeKkIxNJPIrg== - dependencies: - "@babel/template" "^7.16.7" - "@babel/traverse" "^7.18.2" - "@babel/types" "^7.18.2" - -"@babel/highlight@^7.16.7": - version "7.17.12" - resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.17.12.tgz#257de56ee5afbd20451ac0a75686b6b404257351" - integrity sha512-7yykMVF3hfZY2jsHZEEgLc+3x4o1O+fYyULu11GynEUQNwB6lua+IIQn1FiJxNucd5UlyJryrwsOh8PL9Sn8Qg== - dependencies: - "@babel/helper-validator-identifier" "^7.16.7" +"@babel/helper-environment-visitor@^7.18.9": + version "7.18.9" + resolved "https://registry.yarnpkg.com/@babel/helper-environment-visitor/-/helper-environment-visitor-7.18.9.tgz#0c0cee9b35d2ca190478756865bb3528422f51be" + integrity sha512-3r/aACDJ3fhQ/EVgFy0hpj8oHyHpQc+LPtJoY9SzTThAsStm4Ptegq92vqKoE3vD706ZVFWITnMnxucw+S9Ipg== + +"@babel/helper-function-name@^7.18.9": + version "7.18.9" + resolved "https://registry.yarnpkg.com/@babel/helper-function-name/-/helper-function-name-7.18.9.tgz#940e6084a55dee867d33b4e487da2676365e86b0" + integrity sha512-fJgWlZt7nxGksJS9a0XdSaI4XvpExnNIgRP+rVefWh5U7BL8pPuir6SJUmFKRfjWQ51OtWSzwOxhaH/EBWWc0A== + dependencies: + "@babel/template" "^7.18.6" + "@babel/types" "^7.18.9" + +"@babel/helper-hoist-variables@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/helper-hoist-variables/-/helper-hoist-variables-7.18.6.tgz#d4d2c8fb4baeaa5c68b99cc8245c56554f926678" + integrity sha512-UlJQPkFqFULIcyW5sbzgbkxn2FKRgwWiRexcuaR8RNJRy8+LLveqPjwZV/bwrLZCN0eUHD/x8D0heK1ozuoo6Q== + dependencies: + "@babel/types" "^7.18.6" + +"@babel/helper-module-imports@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/helper-module-imports/-/helper-module-imports-7.18.6.tgz#1e3ebdbbd08aad1437b428c50204db13c5a3ca6e" + integrity sha512-0NFvs3VkuSYbFi1x2Vd6tKrywq+z/cLeYC/RJNFrIX/30Bf5aiGYbtvGXolEktzJH8o5E5KJ3tT+nkxuuZFVlA== + dependencies: + "@babel/types" "^7.18.6" + +"@babel/helper-module-transforms@^7.18.9": + version "7.18.9" + resolved "https://registry.yarnpkg.com/@babel/helper-module-transforms/-/helper-module-transforms-7.18.9.tgz#5a1079c005135ed627442df31a42887e80fcb712" + integrity sha512-KYNqY0ICwfv19b31XzvmI/mfcylOzbLtowkw+mfvGPAQ3kfCnMLYbED3YecL5tPd8nAYFQFAd6JHp2LxZk/J1g== + dependencies: + "@babel/helper-environment-visitor" "^7.18.9" + "@babel/helper-module-imports" "^7.18.6" + "@babel/helper-simple-access" "^7.18.6" + "@babel/helper-split-export-declaration" "^7.18.6" + "@babel/helper-validator-identifier" "^7.18.6" + "@babel/template" "^7.18.6" + "@babel/traverse" "^7.18.9" + "@babel/types" "^7.18.9" + +"@babel/helper-plugin-utils@^7.0.0", "@babel/helper-plugin-utils@^7.10.4", "@babel/helper-plugin-utils@^7.12.13", "@babel/helper-plugin-utils@^7.14.5", "@babel/helper-plugin-utils@^7.18.6", "@babel/helper-plugin-utils@^7.8.0": + version "7.18.9" + resolved "https://registry.yarnpkg.com/@babel/helper-plugin-utils/-/helper-plugin-utils-7.18.9.tgz#4b8aea3b069d8cb8a72cdfe28ddf5ceca695ef2f" + integrity sha512-aBXPT3bmtLryXaoJLyYPXPlSD4p1ld9aYeR+sJNOZjJJGiOpb+fKfh3NkcCu7J54nUJwCERPBExCCpyCOHnu/w== + +"@babel/helper-simple-access@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/helper-simple-access/-/helper-simple-access-7.18.6.tgz#d6d8f51f4ac2978068df934b569f08f29788c7ea" + integrity sha512-iNpIgTgyAvDQpDj76POqg+YEt8fPxx3yaNBg3S30dxNKm2SWfYhD0TGrK/Eu9wHpUW63VQU894TsTg+GLbUa1g== + dependencies: + "@babel/types" "^7.18.6" + +"@babel/helper-split-export-declaration@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.18.6.tgz#7367949bc75b20c6d5a5d4a97bba2824ae8ef075" + integrity sha512-bde1etTx6ZyTmobl9LLMMQsaizFVZrquTEHOqKeQESMKo4PlObf+8+JA25ZsIpZhT/WEd39+vOdLXAFG/nELpA== + dependencies: + "@babel/types" "^7.18.6" + +"@babel/helper-string-parser@^7.18.10": + version "7.18.10" + resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.18.10.tgz#181f22d28ebe1b3857fa575f5c290b1aaf659b56" + integrity sha512-XtIfWmeNY3i4t7t4D2t02q50HvqHybPqW2ki1kosnvWCwuCMeo81Jf0gwr85jy/neUdg5XDdeFE/80DXiO+njw== + +"@babel/helper-validator-identifier@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.18.6.tgz#9c97e30d31b2b8c72a1d08984f2ca9b574d7a076" + integrity sha512-MmetCkz9ej86nJQV+sFCxoGGrUbU3q02kgLciwkrt9QqEB7cP39oKEY0PakknEO0Gu20SskMRi+AYZ3b1TpN9g== + +"@babel/helper-validator-option@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/helper-validator-option/-/helper-validator-option-7.18.6.tgz#bf0d2b5a509b1f336099e4ff36e1a63aa5db4db8" + integrity sha512-XO7gESt5ouv/LRJdrVjkShckw6STTaB7l9BrpBaAHDeF5YZT+01PCwmR0SJHnkW6i8OwW/EVWRShfi4j2x+KQw== + +"@babel/helpers@^7.18.9": + version "7.18.9" + resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.18.9.tgz#4bef3b893f253a1eced04516824ede94dcfe7ff9" + integrity sha512-Jf5a+rbrLoR4eNdUmnFu8cN5eNJT6qdTdOg5IHIzq87WwyRw9PwguLFOWYgktN/60IP4fgDUawJvs7PjQIzELQ== + dependencies: + "@babel/template" "^7.18.6" + "@babel/traverse" "^7.18.9" + "@babel/types" "^7.18.9" + +"@babel/highlight@^7.18.6": + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.18.6.tgz#81158601e93e2563795adcbfbdf5d64be3f2ecdf" + integrity sha512-u7stbOuYjaPezCuLj29hNW1v64M2Md2qupEKP1fHc7WdOA3DgLh37suiSrZYY7haUB7iBeQZ9P1uiRF359do3g== + dependencies: + "@babel/helper-validator-identifier" "^7.18.6" chalk "^2.0.0" js-tokens "^4.0.0" -"@babel/parser@^7.1.0", "@babel/parser@^7.14.7", "@babel/parser@^7.16.7", "@babel/parser@^7.18.0": - version "7.18.4" - resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.18.4.tgz#6774231779dd700e0af29f6ad8d479582d7ce5ef" - integrity sha512-FDge0dFazETFcxGw/EXzOkN8uJp0PC7Qbm+Pe9T+av2zlBpOgunFHkQPPn+eRuClU73JF+98D531UgayY89tow== +"@babel/parser@^7.1.0", "@babel/parser@^7.14.7", "@babel/parser@^7.18.10", "@babel/parser@^7.18.11": + version "7.18.11" + resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.18.11.tgz#68bb07ab3d380affa9a3f96728df07969645d2d9" + integrity sha512-9JKn5vN+hDt0Hdqn1PiJ2guflwP+B6Ga8qbDuoF0PzzVhrzsKIJo8yGqVk6CmMHiMei9w1C1Bp9IMJSIK+HPIQ== "@babel/plugin-syntax-async-generators@^7.8.4": version "7.8.4" @@ -274,43 +279,44 @@ "@babel/helper-plugin-utils" "^7.14.5" "@babel/plugin-syntax-typescript@^7.7.2": - version "7.17.12" - resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.17.12.tgz#b54fc3be6de734a56b87508f99d6428b5b605a7b" - integrity sha512-TYY0SXFiO31YXtNg3HtFwNJHjLsAyIIhAhNWkQ5whPPS7HWUFlg9z0Ta4qAQNjQbP1wsSt/oKkmZ/4/WWdMUpw== - dependencies: - "@babel/helper-plugin-utils" "^7.17.12" - -"@babel/template@^7.16.7", "@babel/template@^7.3.3": - version "7.16.7" - resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.16.7.tgz#8d126c8701fde4d66b264b3eba3d96f07666d155" - integrity sha512-I8j/x8kHUrbYRTUxXrrMbfCa7jxkE7tZre39x3kjr9hvI82cK1FfqLygotcWN5kdPGWcLdWMHpSBavse5tWw3w== - dependencies: - "@babel/code-frame" "^7.16.7" - "@babel/parser" "^7.16.7" - "@babel/types" "^7.16.7" - -"@babel/traverse@^7.18.0", "@babel/traverse@^7.18.2", "@babel/traverse@^7.7.2": - version "7.18.2" - resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.18.2.tgz#b77a52604b5cc836a9e1e08dca01cba67a12d2e8" - integrity sha512-9eNwoeovJ6KH9zcCNnENY7DMFwTU9JdGCFtqNLfUAqtUHRCOsTOqWoffosP8vKmNYeSBUv3yVJXjfd8ucwOjUA== - dependencies: - "@babel/code-frame" "^7.16.7" - "@babel/generator" "^7.18.2" - "@babel/helper-environment-visitor" "^7.18.2" - "@babel/helper-function-name" "^7.17.9" - "@babel/helper-hoist-variables" "^7.16.7" - "@babel/helper-split-export-declaration" "^7.16.7" - "@babel/parser" "^7.18.0" - "@babel/types" "^7.18.2" + version "7.18.6" + resolved "https://registry.yarnpkg.com/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.18.6.tgz#1c09cd25795c7c2b8a4ba9ae49394576d4133285" + integrity sha512-mAWAuq4rvOepWCBid55JuRNvpTNf2UGVgoz4JV0fXEKolsVZDzsa4NqCef758WZJj/GDu0gVGItjKFiClTAmZA== + dependencies: + "@babel/helper-plugin-utils" "^7.18.6" + +"@babel/template@^7.18.10", "@babel/template@^7.18.6", "@babel/template@^7.3.3": + version "7.18.10" + resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.18.10.tgz#6f9134835970d1dbf0835c0d100c9f38de0c5e71" + integrity sha512-TI+rCtooWHr3QJ27kJxfjutghu44DLnasDMwpDqCXVTal9RLp3RSYNh4NdBrRP2cQAoG9A8juOQl6P6oZG4JxA== + dependencies: + "@babel/code-frame" "^7.18.6" + "@babel/parser" "^7.18.10" + "@babel/types" "^7.18.10" + +"@babel/traverse@^7.18.10", "@babel/traverse@^7.18.9", "@babel/traverse@^7.7.2": + version "7.18.11" + resolved "https://registry.yarnpkg.com/@babel/traverse/-/traverse-7.18.11.tgz#3d51f2afbd83ecf9912bcbb5c4d94e3d2ddaa16f" + integrity sha512-TG9PiM2R/cWCAy6BPJKeHzNbu4lPzOSZpeMfeNErskGpTJx6trEvFaVCbDvpcxwy49BKWmEPwiW8mrysNiDvIQ== + dependencies: + "@babel/code-frame" "^7.18.6" + "@babel/generator" "^7.18.10" + "@babel/helper-environment-visitor" "^7.18.9" + "@babel/helper-function-name" "^7.18.9" + "@babel/helper-hoist-variables" "^7.18.6" + "@babel/helper-split-export-declaration" "^7.18.6" + "@babel/parser" "^7.18.11" + "@babel/types" "^7.18.10" debug "^4.1.0" globals "^11.1.0" -"@babel/types@^7.0.0", "@babel/types@^7.16.7", "@babel/types@^7.17.0", "@babel/types@^7.18.0", "@babel/types@^7.18.2", "@babel/types@^7.3.0", "@babel/types@^7.3.3": - version "7.18.4" - resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.18.4.tgz#27eae9b9fd18e9dccc3f9d6ad051336f307be354" - integrity sha512-ThN1mBcMq5pG/Vm2IcBmPPfyPXbd8S02rS+OBIDENdufvqC7Z/jHPCv9IcP01277aKtDI8g/2XysBN4hA8niiw== +"@babel/types@^7.0.0", "@babel/types@^7.18.10", "@babel/types@^7.18.6", "@babel/types@^7.18.9", "@babel/types@^7.3.0", "@babel/types@^7.3.3": + version "7.18.10" + resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.18.10.tgz#4908e81b6b339ca7c6b7a555a5fc29446f26dde6" + integrity sha512-MJvnbEiiNkpjo+LknnmRrqbY1GPUUggjv+wQVjetM/AONoupqRALB7I6jGqNUAZsKcRIEu2J6FRFvsczljjsaQ== dependencies: - "@babel/helper-validator-identifier" "^7.16.7" + "@babel/helper-string-parser" "^7.18.10" + "@babel/helper-validator-identifier" "^7.18.6" to-fast-properties "^2.0.0" "@bcoe/v8-coverage@^0.2.3": @@ -325,6 +331,11 @@ dependencies: "@jridgewell/trace-mapping" "0.3.9" +"@esbuild/linux-loong64@0.14.53": + version "0.14.53" + resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.14.53.tgz#251b4cd6760fadb4d68a05815e6dc5e432d69cd6" + integrity sha512-W2dAL6Bnyn4xa/QRSU3ilIK4EzD5wgYXKXJiS1HDF5vU3675qc2bvFyLwbUcdmssDveyndy7FbitrCoiV/eMLg== + "@eslint/eslintrc@^1.3.0": version "1.3.0" resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-1.3.0.tgz#29f92c30bb3e771e4a2048c95fa6855392dfac4f" @@ -340,7 +351,7 @@ minimatch "^3.1.2" strip-json-comments "^3.1.1" -"@gar/promisify@^1.0.1", "@gar/promisify@^1.1.3": +"@gar/promisify@^1.1.3": version "1.1.3" resolved "https://registry.yarnpkg.com/@gar/promisify/-/promisify-1.1.3.tgz#555193ab2e3bb3b6adc3d551c9c030d9e860daf6" integrity sha512-k2Ty1JcVojjJFwrg/ThKi2ujJ7XNLYaFGNB/bWT9wGR+oSMJHMa5w+CUq6p/pVrKeNNgA7pCqEcjSnHVoqJQFw== @@ -364,15 +375,20 @@ normalize-path "^2.0.1" through2 "^2.0.3" -"@humanwhocodes/config-array@^0.9.2": - version "0.9.5" - resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.9.5.tgz#2cbaf9a89460da24b5ca6531b8bbfc23e1df50c7" - integrity sha512-ObyMyWxZiCu/yTisA7uzx81s40xR2fD5Cg/2Kq7G02ajkNubJf6BopgDTmDyc3U7sXpNKM8cYOw7s7Tyr+DnCw== +"@humanwhocodes/config-array@^0.10.4": + version "0.10.4" + resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.10.4.tgz#01e7366e57d2ad104feea63e72248f22015c520c" + integrity sha512-mXAIHxZT3Vcpg83opl1wGlVZ9xydbfZO3r5YfRSH6Gpp2J/PfdBP0wbDa2sO6/qRbcalpoevVyW6A/fI6LfeMw== dependencies: "@humanwhocodes/object-schema" "^1.2.1" debug "^4.1.1" minimatch "^3.0.4" +"@humanwhocodes/gitignore-to-minimatch@^1.0.2": + version "1.0.2" + resolved "https://registry.yarnpkg.com/@humanwhocodes/gitignore-to-minimatch/-/gitignore-to-minimatch-1.0.2.tgz#316b0a63b91c10e53f242efb4ace5c3b34e8728d" + integrity sha512-rSqmMJDdLFUsyxR6FMtD00nfQKKLFb1kv+qBbOVKqErvloEIJLo5bDTJTQNTYgeyp78JsA7u/NPi5jT1GR/MuA== + "@humanwhocodes/object-schema@^1.2.1": version "1.2.1" resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-1.2.1.tgz#b520529ec21d8e5945a1851dfd1c32e94e39ff45" @@ -404,110 +420,110 @@ resolved "https://registry.yarnpkg.com/@istanbuljs/schema/-/schema-0.1.3.tgz#e45e384e4b8ec16bce2fd903af78450f6bf7ec98" integrity sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA== -"@jest/console@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/console/-/console-28.1.0.tgz#db78222c3d3b0c1db82f1b9de51094c2aaff2176" - integrity sha512-tscn3dlJFGay47kb4qVruQg/XWlmvU0xp3EJOjzzY+sBaI+YgwKcvAmTcyYU7xEiLLIY5HCdWRooAL8dqkFlDA== +"@jest/console@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/console/-/console-28.1.3.tgz#2030606ec03a18c31803b8a36382762e447655df" + integrity sha512-QPAkP5EwKdK/bxIr6C1I4Vs0rm2nHiANzj/Z5X2JQkrZo6IqvC4ldZ9K95tF0HdidhA8Bo6egxSzUFPYKcEXLw== dependencies: - "@jest/types" "^28.1.0" + "@jest/types" "^28.1.3" "@types/node" "*" chalk "^4.0.0" - jest-message-util "^28.1.0" - jest-util "^28.1.0" + jest-message-util "^28.1.3" + jest-util "^28.1.3" slash "^3.0.0" -"@jest/core@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/core/-/core-28.1.0.tgz#784a1e6ce5358b46fcbdcfbbd93b1b713ed4ea80" - integrity sha512-/2PTt0ywhjZ4NwNO4bUqD9IVJfmFVhVKGlhvSpmEfUCuxYf/3NHcKmRFI+I71lYzbTT3wMuYpETDCTHo81gC/g== +"@jest/core@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/core/-/core-28.1.3.tgz#0ebf2bd39840f1233cd5f2d1e6fc8b71bd5a1ac7" + integrity sha512-CIKBrlaKOzA7YG19BEqCw3SLIsEwjZkeJzf5bdooVnW4bH5cktqe3JX+G2YV1aK5vP8N9na1IGWFzYaTp6k6NA== dependencies: - "@jest/console" "^28.1.0" - "@jest/reporters" "^28.1.0" - "@jest/test-result" "^28.1.0" - "@jest/transform" "^28.1.0" - "@jest/types" "^28.1.0" + "@jest/console" "^28.1.3" + "@jest/reporters" "^28.1.3" + "@jest/test-result" "^28.1.3" + "@jest/transform" "^28.1.3" + "@jest/types" "^28.1.3" "@types/node" "*" ansi-escapes "^4.2.1" chalk "^4.0.0" ci-info "^3.2.0" exit "^0.1.2" graceful-fs "^4.2.9" - jest-changed-files "^28.0.2" - jest-config "^28.1.0" - jest-haste-map "^28.1.0" - jest-message-util "^28.1.0" + jest-changed-files "^28.1.3" + jest-config "^28.1.3" + jest-haste-map "^28.1.3" + jest-message-util "^28.1.3" jest-regex-util "^28.0.2" - jest-resolve "^28.1.0" - jest-resolve-dependencies "^28.1.0" - jest-runner "^28.1.0" - jest-runtime "^28.1.0" - jest-snapshot "^28.1.0" - jest-util "^28.1.0" - jest-validate "^28.1.0" - jest-watcher "^28.1.0" + jest-resolve "^28.1.3" + jest-resolve-dependencies "^28.1.3" + jest-runner "^28.1.3" + jest-runtime "^28.1.3" + jest-snapshot "^28.1.3" + jest-util "^28.1.3" + jest-validate "^28.1.3" + jest-watcher "^28.1.3" micromatch "^4.0.4" - pretty-format "^28.1.0" + pretty-format "^28.1.3" rimraf "^3.0.0" slash "^3.0.0" strip-ansi "^6.0.0" -"@jest/environment@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-28.1.0.tgz#dedf7d59ec341b9292fcf459fd0ed819eb2e228a" - integrity sha512-S44WGSxkRngzHslhV6RoAExekfF7Qhwa6R5+IYFa81mpcj0YgdBnRSmvHe3SNwOt64yXaE5GG8Y2xM28ii5ssA== +"@jest/environment@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-28.1.3.tgz#abed43a6b040a4c24fdcb69eab1f97589b2d663e" + integrity sha512-1bf40cMFTEkKyEf585R9Iz1WayDjHoHqvts0XFYEqyKM3cFWDpeMoqKKTAF9LSYQModPUlh8FKptoM2YcMWAXA== dependencies: - "@jest/fake-timers" "^28.1.0" - "@jest/types" "^28.1.0" + "@jest/fake-timers" "^28.1.3" + "@jest/types" "^28.1.3" "@types/node" "*" - jest-mock "^28.1.0" + jest-mock "^28.1.3" -"@jest/expect-utils@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/expect-utils/-/expect-utils-28.1.0.tgz#a5cde811195515a9809b96748ae8bcc331a3538a" - integrity sha512-5BrG48dpC0sB80wpeIX5FU6kolDJI4K0n5BM9a5V38MGx0pyRvUBSS0u2aNTdDzmOrCjhOg8pGs6a20ivYkdmw== +"@jest/expect-utils@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/expect-utils/-/expect-utils-28.1.3.tgz#58561ce5db7cd253a7edddbc051fb39dda50f525" + integrity sha512-wvbi9LUrHJLn3NlDW6wF2hvIMtd4JUl2QNVrjq+IBSHirgfrR3o9RnVtxzdEGO2n9JyIWwHnLfby5KzqBGg2YA== dependencies: jest-get-type "^28.0.2" -"@jest/expect@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/expect/-/expect-28.1.0.tgz#2e5a31db692597070932366a1602b5157f0f217c" - integrity sha512-be9ETznPLaHOmeJqzYNIXv1ADEzENuQonIoobzThOYPuK/6GhrWNIJDVTgBLCrz3Am73PyEU2urQClZp0hLTtA== +"@jest/expect@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/expect/-/expect-28.1.3.tgz#9ac57e1d4491baca550f6bdbd232487177ad6a72" + integrity sha512-lzc8CpUbSoE4dqT0U+g1qODQjBRHPpCPXissXD4mS9+sWQdmmpeJ9zSH1rS1HEkrsMN0fb7nKrJ9giAR1d3wBw== dependencies: - expect "^28.1.0" - jest-snapshot "^28.1.0" + expect "^28.1.3" + jest-snapshot "^28.1.3" -"@jest/fake-timers@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-28.1.0.tgz#ea77878aabd5c5d50e1fc53e76d3226101e33064" - integrity sha512-Xqsf/6VLeAAq78+GNPzI7FZQRf5cCHj1qgQxCjws9n8rKw8r1UYoeaALwBvyuzOkpU3c1I6emeMySPa96rxtIg== +"@jest/fake-timers@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-28.1.3.tgz#230255b3ad0a3d4978f1d06f70685baea91c640e" + integrity sha512-D/wOkL2POHv52h+ok5Oj/1gOG9HSywdoPtFsRCUmlCILXNn5eIWmcnd3DIiWlJnpGvQtmajqBP95Ei0EimxfLw== dependencies: - "@jest/types" "^28.1.0" - "@sinonjs/fake-timers" "^9.1.1" + "@jest/types" "^28.1.3" + "@sinonjs/fake-timers" "^9.1.2" "@types/node" "*" - jest-message-util "^28.1.0" - jest-mock "^28.1.0" - jest-util "^28.1.0" + jest-message-util "^28.1.3" + jest-mock "^28.1.3" + jest-util "^28.1.3" -"@jest/globals@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-28.1.0.tgz#a4427d2eb11763002ff58e24de56b84ba79eb793" - integrity sha512-3m7sTg52OTQR6dPhsEQSxAvU+LOBbMivZBwOvKEZ+Rb+GyxVnXi9HKgOTYkx/S99T8yvh17U4tNNJPIEQmtwYw== +"@jest/globals@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-28.1.3.tgz#a601d78ddc5fdef542728309894895b4a42dc333" + integrity sha512-XFU4P4phyryCXu1pbcqMO0GSQcYe1IsalYCDzRNyhetyeyxMcIxa11qPNDpVNLeretItNqEmYYQn1UYz/5x1NA== dependencies: - "@jest/environment" "^28.1.0" - "@jest/expect" "^28.1.0" - "@jest/types" "^28.1.0" + "@jest/environment" "^28.1.3" + "@jest/expect" "^28.1.3" + "@jest/types" "^28.1.3" -"@jest/reporters@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-28.1.0.tgz#5183a28b9b593b6000fa9b89b031c7216b58a9a0" - integrity sha512-qxbFfqap/5QlSpIizH9c/bFCDKsQlM4uAKSOvZrP+nIdrjqre3FmKzpTtYyhsaVcOSNK7TTt2kjm+4BJIjysFA== +"@jest/reporters@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-28.1.3.tgz#9adf6d265edafc5fc4a434cfb31e2df5a67a369a" + integrity sha512-JuAy7wkxQZVNU/V6g9xKzCGC5LVXx9FDcABKsSXp5MiKPEE2144a/vXTEDoyzjUpZKfVwp08Wqg5A4WfTMAzjg== dependencies: "@bcoe/v8-coverage" "^0.2.3" - "@jest/console" "^28.1.0" - "@jest/test-result" "^28.1.0" - "@jest/transform" "^28.1.0" - "@jest/types" "^28.1.0" - "@jridgewell/trace-mapping" "^0.3.7" + "@jest/console" "^28.1.3" + "@jest/test-result" "^28.1.3" + "@jest/transform" "^28.1.3" + "@jest/types" "^28.1.3" + "@jridgewell/trace-mapping" "^0.3.13" "@types/node" "*" chalk "^4.0.0" collect-v8-coverage "^1.0.0" @@ -519,66 +535,67 @@ istanbul-lib-report "^3.0.0" istanbul-lib-source-maps "^4.0.0" istanbul-reports "^3.1.3" - jest-util "^28.1.0" - jest-worker "^28.1.0" + jest-message-util "^28.1.3" + jest-util "^28.1.3" + jest-worker "^28.1.3" slash "^3.0.0" string-length "^4.0.1" strip-ansi "^6.0.0" terminal-link "^2.0.0" - v8-to-istanbul "^9.0.0" + v8-to-istanbul "^9.0.1" -"@jest/schemas@^28.0.2": - version "28.0.2" - resolved "https://registry.yarnpkg.com/@jest/schemas/-/schemas-28.0.2.tgz#08c30df6a8d07eafea0aef9fb222c5e26d72e613" - integrity sha512-YVDJZjd4izeTDkij00vHHAymNXQ6WWsdChFRK86qck6Jpr3DCL5W3Is3vslviRlP+bLuMYRLbdp98amMvqudhA== +"@jest/schemas@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/schemas/-/schemas-28.1.3.tgz#ad8b86a66f11f33619e3d7e1dcddd7f2d40ff905" + integrity sha512-/l/VWsdt/aBXgjshLWOFyFt3IVdYypu5y2Wn2rOO1un6nkqIn8SLXzgIMYXFyYsRWDyF5EthmKJMIdJvk08grg== dependencies: - "@sinclair/typebox" "^0.23.3" + "@sinclair/typebox" "^0.24.1" -"@jest/source-map@^28.0.2": - version "28.0.2" - resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-28.0.2.tgz#914546f4410b67b1d42c262a1da7e0406b52dc90" - integrity sha512-Y9dxC8ZpN3kImkk0LkK5XCEneYMAXlZ8m5bflmSL5vrwyeUpJfentacCUg6fOb8NOpOO7hz2+l37MV77T6BFPw== +"@jest/source-map@^28.1.2": + version "28.1.2" + resolved "https://registry.yarnpkg.com/@jest/source-map/-/source-map-28.1.2.tgz#7fe832b172b497d6663cdff6c13b0a920e139e24" + integrity sha512-cV8Lx3BeStJb8ipPHnqVw/IM2VCMWO3crWZzYodSIkxXnRcXJipCdx1JCK0K5MsJJouZQTH73mzf4vgxRaH9ww== dependencies: - "@jridgewell/trace-mapping" "^0.3.7" + "@jridgewell/trace-mapping" "^0.3.13" callsites "^3.0.0" graceful-fs "^4.2.9" -"@jest/test-result@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-28.1.0.tgz#fd149dee123510dd2fcadbbf5f0020f98ad7f12c" - integrity sha512-sBBFIyoPzrZho3N+80P35A5oAkSKlGfsEFfXFWuPGBsW40UAjCkGakZhn4UQK4iQlW2vgCDMRDOob9FGKV8YoQ== +"@jest/test-result@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-28.1.3.tgz#5eae945fd9f4b8fcfce74d239e6f725b6bf076c5" + integrity sha512-kZAkxnSE+FqE8YjW8gNuoVkkC9I7S1qmenl8sGcDOLropASP+BkcGKwhXoyqQuGOGeYY0y/ixjrd/iERpEXHNg== dependencies: - "@jest/console" "^28.1.0" - "@jest/types" "^28.1.0" + "@jest/console" "^28.1.3" + "@jest/types" "^28.1.3" "@types/istanbul-lib-coverage" "^2.0.0" collect-v8-coverage "^1.0.0" -"@jest/test-sequencer@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-28.1.0.tgz#ce7294bbe986415b9a30e218c7e705e6ebf2cdf2" - integrity sha512-tZCEiVWlWNTs/2iK9yi6o3AlMfbbYgV4uuZInSVdzZ7ftpHZhCMuhvk2HLYhCZzLgPFQ9MnM1YaxMnh3TILFiQ== +"@jest/test-sequencer@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-28.1.3.tgz#9d0c283d906ac599c74bde464bc0d7e6a82886c3" + integrity sha512-NIMPEqqa59MWnDi1kvXXpYbqsfQmSJsIbnd85mdVGkiDfQ9WQQTXOLsvISUfonmnBT+w85WEgneCigEEdHDFxw== dependencies: - "@jest/test-result" "^28.1.0" + "@jest/test-result" "^28.1.3" graceful-fs "^4.2.9" - jest-haste-map "^28.1.0" + jest-haste-map "^28.1.3" slash "^3.0.0" -"@jest/transform@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-28.1.0.tgz#224a3c9ba4cc98e2ff996c0a89a2d59db15c74ce" - integrity sha512-omy2xe5WxlAfqmsTjTPxw+iXRTRnf+NtX0ToG+4S0tABeb4KsKmPUHq5UBuwunHg3tJRwgEQhEp0M/8oiatLEA== +"@jest/transform@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-28.1.3.tgz#59d8098e50ab07950e0f2fc0fc7ec462371281b0" + integrity sha512-u5dT5di+oFI6hfcLOHGTAfmUxFRrjK+vnaP0kkVow9Md/M7V/MxqQMOz/VV25UZO8pzeA9PjfTpOu6BDuwSPQA== dependencies: "@babel/core" "^7.11.6" - "@jest/types" "^28.1.0" - "@jridgewell/trace-mapping" "^0.3.7" + "@jest/types" "^28.1.3" + "@jridgewell/trace-mapping" "^0.3.13" babel-plugin-istanbul "^6.1.1" chalk "^4.0.0" convert-source-map "^1.4.0" fast-json-stable-stringify "^2.0.0" graceful-fs "^4.2.9" - jest-haste-map "^28.1.0" + jest-haste-map "^28.1.3" jest-regex-util "^28.0.2" - jest-util "^28.1.0" + jest-util "^28.1.3" micromatch "^4.0.4" pirates "^4.0.4" slash "^3.0.0" @@ -595,12 +612,12 @@ "@types/yargs" "^15.0.0" chalk "^4.0.0" -"@jest/types@^28.1.0": - version "28.1.0" - resolved "https://registry.yarnpkg.com/@jest/types/-/types-28.1.0.tgz#508327a89976cbf9bd3e1cc74641a29fd7dfd519" - integrity sha512-xmEggMPr317MIOjjDoZ4ejCSr9Lpbt/u34+dvc99t7DS8YirW5rwZEhzKPC2BMUFkUhI48qs6qLUSGw5FuL0GA== +"@jest/types@^28.1.3": + version "28.1.3" + resolved "https://registry.yarnpkg.com/@jest/types/-/types-28.1.3.tgz#b05de80996ff12512bc5ceb1d208285a7d11748b" + integrity sha512-RyjiyMUZrKz/c+zlMFO1pm70DcIlST8AeWTkoUdZevew44wcNZQHsEVOiCVtgVnlFFD82FPaXycys58cf2muVQ== dependencies: - "@jest/schemas" "^28.0.2" + "@jest/schemas" "^28.1.3" "@types/istanbul-lib-coverage" "^2.0.0" "@types/istanbul-reports" "^3.0.0" "@types/node" "*" @@ -615,24 +632,24 @@ "@jridgewell/set-array" "^1.0.0" "@jridgewell/sourcemap-codec" "^1.4.10" -"@jridgewell/gen-mapping@^0.3.0": - version "0.3.1" - resolved "https://registry.yarnpkg.com/@jridgewell/gen-mapping/-/gen-mapping-0.3.1.tgz#cf92a983c83466b8c0ce9124fadeaf09f7c66ea9" - integrity sha512-GcHwniMlA2z+WFPWuY8lp3fsza0I8xPFMWL5+n8LYyP6PSvPrXf4+n8stDHZY2DM0zy9sVkRDy1jDI4XGzYVqg== +"@jridgewell/gen-mapping@^0.3.0", "@jridgewell/gen-mapping@^0.3.2": + version "0.3.2" + resolved "https://registry.yarnpkg.com/@jridgewell/gen-mapping/-/gen-mapping-0.3.2.tgz#c1aedc61e853f2bb9f5dfe6d4442d3b565b253b9" + integrity sha512-mh65xKQAzI6iBcFzwv28KVWSmCkdRBWoOh+bYQGW3+6OZvbbN3TqMGo5hqYxQniRcH9F2VZIoJCm4pa3BPDK/A== dependencies: - "@jridgewell/set-array" "^1.0.0" + "@jridgewell/set-array" "^1.0.1" "@jridgewell/sourcemap-codec" "^1.4.10" "@jridgewell/trace-mapping" "^0.3.9" "@jridgewell/resolve-uri@^3.0.3": - version "3.0.7" - resolved "https://registry.yarnpkg.com/@jridgewell/resolve-uri/-/resolve-uri-3.0.7.tgz#30cd49820a962aff48c8fffc5cd760151fca61fe" - integrity sha512-8cXDaBBHOr2pQ7j77Y6Vp5VDT2sIqWyWQ56TjEq4ih/a4iST3dItRe8Q9fp0rrIl9DoKhWQtUQz/YpOxLkXbNA== + version "3.1.0" + resolved "https://registry.yarnpkg.com/@jridgewell/resolve-uri/-/resolve-uri-3.1.0.tgz#2203b118c157721addfe69d47b70465463066d78" + integrity sha512-F2msla3tad+Mfht5cJq7LSXcdudKTWCVYUgw6pLFOOHSTtZlj6SWNYAp+AhuqLmWdBO2X5hPrLcu8cVP8fy28w== -"@jridgewell/set-array@^1.0.0": - version "1.1.1" - resolved "https://registry.yarnpkg.com/@jridgewell/set-array/-/set-array-1.1.1.tgz#36a6acc93987adcf0ba50c66908bd0b70de8afea" - integrity sha512-Ct5MqZkLGEXTVmQYbGtx9SVqD2fqwvdubdps5D3djjAkgkKwT918VNOz65pEHFaYTeWcukmJmH5SwsA9Tn2ObQ== +"@jridgewell/set-array@^1.0.0", "@jridgewell/set-array@^1.0.1": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@jridgewell/set-array/-/set-array-1.1.2.tgz#7c6cf998d6d20b914c0a55a91ae928ff25965e72" + integrity sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw== "@jridgewell/source-map@^0.3.2": version "0.3.2" @@ -643,9 +660,9 @@ "@jridgewell/trace-mapping" "^0.3.9" "@jridgewell/sourcemap-codec@^1.4.10": - version "1.4.13" - resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.13.tgz#b6461fb0c2964356c469e115f504c95ad97ab88c" - integrity sha512-GryiOJmNcWbovBxTfZSF71V/mXbgcV3MewDe3kIMCLyIh5e7SKAeUZs+rMnJ8jkMolZ/4/VsdBmMrw3l+VdZ3w== + version "1.4.14" + resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz#add4c98d341472a289190b424efbdb096991bb24" + integrity sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw== "@jridgewell/trace-mapping@0.3.9": version "0.3.9" @@ -655,637 +672,638 @@ "@jridgewell/resolve-uri" "^3.0.3" "@jridgewell/sourcemap-codec" "^1.4.10" -"@jridgewell/trace-mapping@^0.3.7", "@jridgewell/trace-mapping@^0.3.9": - version "0.3.13" - resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.13.tgz#dcfe3e95f224c8fe97a87a5235defec999aa92ea" - integrity sha512-o1xbKhp9qnIAoHJSWd6KlCZfqslL4valSF81H8ImioOAxluWYWOpWkpyktY2vnt4tbrX9XYaxovq6cgowaJp2w== +"@jridgewell/trace-mapping@^0.3.12", "@jridgewell/trace-mapping@^0.3.13", "@jridgewell/trace-mapping@^0.3.7", "@jridgewell/trace-mapping@^0.3.9": + version "0.3.14" + resolved "https://registry.yarnpkg.com/@jridgewell/trace-mapping/-/trace-mapping-0.3.14.tgz#b231a081d8f66796e475ad588a1ef473112701ed" + integrity sha512-bJWEfQ9lPTvm3SneWwRFVLzrh6nhjwqw7TUFFBEMzwvg7t7PCDenf2lDwqo4NQXzdpgBXyFgDWnQA+2vkruksQ== dependencies: "@jridgewell/resolve-uri" "^3.0.3" "@jridgewell/sourcemap-codec" "^1.4.10" -"@lerna/add@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/add/-/add-5.0.0.tgz#0545e2eef157c142d82ba765467c27b36fe53ce8" - integrity sha512-KdIOQL+88iHU9zuAU8Be1AL4cOVmm77nlckylsNaVVTiomNipr/h7lStiBO52BoMkwKzNwOH6He5HGY0Yo7s2w== - dependencies: - "@lerna/bootstrap" "5.0.0" - "@lerna/command" "5.0.0" - "@lerna/filter-options" "5.0.0" - "@lerna/npm-conf" "5.0.0" - "@lerna/validation-error" "5.0.0" +"@lerna/add@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/add/-/add-5.3.0.tgz#2e6cd5ff3d8bad2b0b36cdeaa300fc39fbae215e" + integrity sha512-MxwTO2UBxZwwuquKbBqdYa56YTqg6Lfz1MZsRQxO7F2cb2NN8NEYTcGOli/71Ee/2AoX4R4xIFTh3TnaflQ25A== + dependencies: + "@lerna/bootstrap" "5.3.0" + "@lerna/command" "5.3.0" + "@lerna/filter-options" "5.3.0" + "@lerna/npm-conf" "5.3.0" + "@lerna/validation-error" "5.3.0" dedent "^0.7.0" - npm-package-arg "^8.1.0" + npm-package-arg "8.1.1" p-map "^4.0.0" - pacote "^13.4.1" + pacote "^13.6.1" semver "^7.3.4" -"@lerna/bootstrap@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/bootstrap/-/bootstrap-5.0.0.tgz#624b67a4631c7455b98cfed4dbb2e38b27025a7a" - integrity sha512-2m1BxKbYwDABy+uE/Da3EQM61R58bI3YQ0o1rsFQq1u0ltL9CJxw1o0lMg84hwMsBb4D+kLIXLqetYlLVgbr0Q== - dependencies: - "@lerna/command" "5.0.0" - "@lerna/filter-options" "5.0.0" - "@lerna/has-npm-version" "5.0.0" - "@lerna/npm-install" "5.0.0" - "@lerna/package-graph" "5.0.0" - "@lerna/pulse-till-done" "5.0.0" - "@lerna/rimraf-dir" "5.0.0" - "@lerna/run-lifecycle" "5.0.0" - "@lerna/run-topologically" "5.0.0" - "@lerna/symlink-binary" "5.0.0" - "@lerna/symlink-dependencies" "5.0.0" - "@lerna/validation-error" "5.0.0" - "@npmcli/arborist" "5.2.0" +"@lerna/bootstrap@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/bootstrap/-/bootstrap-5.3.0.tgz#3e0e06757ec139b4742f2bb9bc55c10fd8ddf8da" + integrity sha512-iHVjt6YOQKLY0j+ex13a6ZxjIQ1TSSXqbl6z1hVjBFaDyCh7pra/tgj0LohZDVCaouLwRKucceQfTGrb+cfo7A== + dependencies: + "@lerna/command" "5.3.0" + "@lerna/filter-options" "5.3.0" + "@lerna/has-npm-version" "5.3.0" + "@lerna/npm-install" "5.3.0" + "@lerna/package-graph" "5.3.0" + "@lerna/pulse-till-done" "5.3.0" + "@lerna/rimraf-dir" "5.3.0" + "@lerna/run-lifecycle" "5.3.0" + "@lerna/run-topologically" "5.3.0" + "@lerna/symlink-binary" "5.3.0" + "@lerna/symlink-dependencies" "5.3.0" + "@lerna/validation-error" "5.3.0" + "@npmcli/arborist" "5.3.0" dedent "^0.7.0" get-port "^5.1.1" multimatch "^5.0.0" - npm-package-arg "^8.1.0" - npmlog "^4.1.2" + npm-package-arg "8.1.1" + npmlog "^6.0.2" p-map "^4.0.0" p-map-series "^2.1.0" p-waterfall "^2.1.1" semver "^7.3.4" -"@lerna/changed@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/changed/-/changed-5.0.0.tgz#fb3cdd5f281683a461c3099cbcf0978e23b33140" - integrity sha512-A24MHipPGODmzQBH1uIMPPUUOc1Zm7Qe/eSYzm52bFHtVxWH0nIVXfunadoMX32NhzKQH3Sw8X2rWHPQSRoUvA== +"@lerna/changed@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/changed/-/changed-5.3.0.tgz#631dd147f2c86f292106fe6d891b0a2bcc5ad43b" + integrity sha512-i6ZfBDBZCpnPaSWTuNGTrnExkHNMC+/cSUuS9njaqe+tXgqE95Ja3cMxWZth9Q1uasjcEBHPU2jG0VKrU37rpA== dependencies: - "@lerna/collect-updates" "5.0.0" - "@lerna/command" "5.0.0" - "@lerna/listable" "5.0.0" - "@lerna/output" "5.0.0" + "@lerna/collect-updates" "5.3.0" + "@lerna/command" "5.3.0" + "@lerna/listable" "5.3.0" + "@lerna/output" "5.3.0" -"@lerna/check-working-tree@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/check-working-tree/-/check-working-tree-5.0.0.tgz#e7b653b78c3bb96db7a00f6a74018e2bb88ec088" - integrity sha512-PnUMdpT2qS4o+vs+7l5fFIizstGdqSkhLG+Z9ZiY5OMtnGd+pmAFQFlbLSZSmdvQSOSobl9fhB1St8qhPD60xQ== +"@lerna/check-working-tree@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/check-working-tree/-/check-working-tree-5.3.0.tgz#fd10158bcb62a840e343d1a4b12a0eedbc2e0146" + integrity sha512-qo6jUGWXKLVL1nU8aEECqwrGRjs9o1l1hXdD2juA4Fvzsam1cFVHJwsmw3hAXGhEPD0oalg/XR62H9rZSCLOvQ== dependencies: - "@lerna/collect-uncommitted" "5.0.0" - "@lerna/describe-ref" "5.0.0" - "@lerna/validation-error" "5.0.0" + "@lerna/collect-uncommitted" "5.3.0" + "@lerna/describe-ref" "5.3.0" + "@lerna/validation-error" "5.3.0" -"@lerna/child-process@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/child-process/-/child-process-5.0.0.tgz#1c7663d2910431f6c25543fd53998ae95b2dac19" - integrity sha512-cFVNkedrlU8XTt15EvUtQ84hqtV4oToQW/elKNv//mhCz06HY8Y+Ia6XevK2zrIhZjS6DT576F/7SmTk3vnpmg== +"@lerna/child-process@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/child-process/-/child-process-5.3.0.tgz#ec27b96afbb02f4c0cd2cf09db41be5312182799" + integrity sha512-4uXPNIptrgQQQVHVVAXBD8F7IqSvZL3Og0G0DHiWKH+dsSyMIUtaIGJt7sifVoL7nzex4AqEiPq/AubpmG5g4Q== dependencies: chalk "^4.1.0" execa "^5.0.0" strong-log-transformer "^2.1.0" -"@lerna/clean@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/clean/-/clean-5.0.0.tgz#2b5cf202ab3eca18a075b292c55e6641d18b1b8f" - integrity sha512-7B+0Nx6MEPmCfnEa1JFyZwJsC7qlGrikWXyLglLb/wcbapYVsuDauOl9AT1iOFoXKw82P77HWYUKWeD9DQgw/w== - dependencies: - "@lerna/command" "5.0.0" - "@lerna/filter-options" "5.0.0" - "@lerna/prompt" "5.0.0" - "@lerna/pulse-till-done" "5.0.0" - "@lerna/rimraf-dir" "5.0.0" +"@lerna/clean@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/clean/-/clean-5.3.0.tgz#2a98de89c365c711040acbfaa96a52e3ca88af79" + integrity sha512-Jn+Dr7A69dch8m1dLe7l/SDVQVQT2j7zdy2gaZVEmJIgEEaXmEbfJ2t2n06vRXtckI9B85M5mubT1U3Y7KuNuA== + dependencies: + "@lerna/command" "5.3.0" + "@lerna/filter-options" "5.3.0" + "@lerna/prompt" "5.3.0" + "@lerna/pulse-till-done" "5.3.0" + "@lerna/rimraf-dir" "5.3.0" p-map "^4.0.0" p-map-series "^2.1.0" p-waterfall "^2.1.1" -"@lerna/cli@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/cli/-/cli-5.0.0.tgz#f440f6664aa6c22bb58e69aacfde655c831de2f9" - integrity sha512-g8Nifko8XNySOl8u2molSHVl+fk/E1e5FSn/W2ekeijmc3ezktp+xbPWofNq71N/d297+KPQpLBfwzXSo9ufIQ== +"@lerna/cli@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/cli/-/cli-5.3.0.tgz#b42808b747a6b3136028e5cdc775f72805112b95" + integrity sha512-P7F3Xs98pXMEGZX+mnFfsd6gU03x8UrwQ3mElvQBICl4Ew9z6rS8NGUd3JOPFzm4/vSTjYTnPyPdWBjj6/f6sw== dependencies: - "@lerna/global-options" "5.0.0" + "@lerna/global-options" "5.3.0" dedent "^0.7.0" - npmlog "^4.1.2" + npmlog "^6.0.2" yargs "^16.2.0" -"@lerna/collect-uncommitted@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/collect-uncommitted/-/collect-uncommitted-5.0.0.tgz#2843f98995c8bcc1d783d1d9739122c79378f3c5" - integrity sha512-mga/2S9rK0TP5UCulWiCTrC/uKaiIlOro1n8R3oCw6eRw9eupCSRx5zGI7pdh8CPD82MDL7w0a6OTep3WBSBVA== +"@lerna/collect-uncommitted@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/collect-uncommitted/-/collect-uncommitted-5.3.0.tgz#fa031bff12ca8c7c78f8fb4584bd6289ccbba40e" + integrity sha512-Ll/mU9Nes0NQoa0pSv2TR2PTCkIomBGuDWH48OF2sKKu69NuLjrD2L0udS5nJYig9HxFewtm4QTiUdYPxfJXkQ== dependencies: - "@lerna/child-process" "5.0.0" + "@lerna/child-process" "5.3.0" chalk "^4.1.0" - npmlog "^4.1.2" + npmlog "^6.0.2" -"@lerna/collect-updates@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/collect-updates/-/collect-updates-5.0.0.tgz#cce16b9e8136e1e7bc33fe0fb12b283e538fa658" - integrity sha512-X82i8SVgBXLCk8vbKWfQPRLTAXROCANL8Z/bU1l6n7yycsHKdjrrlNi1+KprFdfRsMvSm10R4qPNcl9jgsp/IA== +"@lerna/collect-updates@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/collect-updates/-/collect-updates-5.3.0.tgz#21ec4fa7f7e836937ebc9ec7ab4d2053ad9f7bd7" + integrity sha512-fzJo/rmdXKWKYt+9IXjtenIZtSr3blMH8GEqoVKpSZ7TJGpxcFNmMe6foa60BgaTnDmmg1y7Qu6JbQJ3Ra5c5w== dependencies: - "@lerna/child-process" "5.0.0" - "@lerna/describe-ref" "5.0.0" + "@lerna/child-process" "5.3.0" + "@lerna/describe-ref" "5.3.0" minimatch "^3.0.4" - npmlog "^4.1.2" + npmlog "^6.0.2" slash "^3.0.0" -"@lerna/command@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/command/-/command-5.0.0.tgz#cdc9f32a6b1c7153fe7150d642d2a420a3d0797d" - integrity sha512-j7/apU5d/nhSc1qIZgcV03KyO5jz3y7cwSum3IuK8/XF6rKwt3FVnbue1V3l9sJ6IRJjsRGKyViB1IdP5nSX4Q== - dependencies: - "@lerna/child-process" "5.0.0" - "@lerna/package-graph" "5.0.0" - "@lerna/project" "5.0.0" - "@lerna/validation-error" "5.0.0" - "@lerna/write-log-file" "5.0.0" +"@lerna/command@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/command/-/command-5.3.0.tgz#0ef7a09ca5b03ff08f164500df560959893c6775" + integrity sha512-UNQQ4EGTumqLhOuDPcRA4LpdS9pcTYKSdh/8MdKPeyIRN70vCTwdeTrxqaaKsn3Jo7ycvyUQT5yfrUFmCClfoA== + dependencies: + "@lerna/child-process" "5.3.0" + "@lerna/package-graph" "5.3.0" + "@lerna/project" "5.3.0" + "@lerna/validation-error" "5.3.0" + "@lerna/write-log-file" "5.3.0" clone-deep "^4.0.1" dedent "^0.7.0" execa "^5.0.0" is-ci "^2.0.0" - npmlog "^4.1.2" + npmlog "^6.0.2" -"@lerna/conventional-commits@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/conventional-commits/-/conventional-commits-5.0.0.tgz#7f9c16fda074c9ed897cb695f5ae23678dd441eb" - integrity sha512-tUCRTAycDCtSlCEI0hublq4uKHeV0UHpwIb3Fdt6iv2AoTSPBSX/Dwu/6VqguysOSEkkR4M2JCOLvJCl4IMxwg== +"@lerna/conventional-commits@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/conventional-commits/-/conventional-commits-5.3.0.tgz#64d2035648186146d6c331fd6dcbf146813b3600" + integrity sha512-9uoQ2E1J7pL0fml5PNO7FydnBNeqrNOQa53Ca1Klf5t/x4vIn51ocOZNm/YbRAc/affnrxxp+gR2/SWlN0yKqQ== dependencies: - "@lerna/validation-error" "5.0.0" + "@lerna/validation-error" "5.3.0" conventional-changelog-angular "^5.0.12" - conventional-changelog-core "^4.2.2" + conventional-changelog-core "^4.2.4" conventional-recommended-bump "^6.1.0" fs-extra "^9.1.0" get-stream "^6.0.0" - lodash.template "^4.5.0" - npm-package-arg "^8.1.0" - npmlog "^4.1.2" + npm-package-arg "8.1.1" + npmlog "^6.0.2" pify "^5.0.0" semver "^7.3.4" -"@lerna/create-symlink@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/create-symlink/-/create-symlink-5.0.0.tgz#eccef7f89fdc4d7cd904694d9e2eb0b582073b5e" - integrity sha512-nHYNacrh15Y0yEofVlUVu9dhf4JjIn9hY7v7rOUXzUeQ91iXY5Q3PVHkBeRUigyT5CWP5qozZwraCMwp+lDWYg== +"@lerna/create-symlink@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/create-symlink/-/create-symlink-5.3.0.tgz#8398ca1c099606510505ad65601b15bc4c6f0000" + integrity sha512-xIoC9m4J/u4NV/8ms4P2fiimaYgialqJvNamvMDRmgE1c3BLDSGk2nE4nVI2W5LxjgJdMTiIH9v1QpTUC9Fv+Q== dependencies: - cmd-shim "^4.1.0" + cmd-shim "^5.0.0" fs-extra "^9.1.0" - npmlog "^4.1.2" + npmlog "^6.0.2" -"@lerna/create@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/create/-/create-5.0.0.tgz#4aac3d1f2c1f6d7fadde49d3663b318fcdd39b06" - integrity sha512-sdFTVTLOVuhHpzIYhFAwK0Ry3p4d7uMe9ZG/Ii128/pB9kEEfCth+1WBq6mBpYZ5mOLLgxJbWalbiJFl0toQRw== +"@lerna/create@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/create/-/create-5.3.0.tgz#af0bd2f1da91976a91b5b8ce621b921ea3d155d0" + integrity sha512-DotTReCc3+Q9rpMA8RKAGemUK7JXT7skbxHvpqpPj7ryNkIv/dNAFC2EHglcpt9Rmyo6YbSP2zk0gfDbdiIcVA== dependencies: - "@lerna/child-process" "5.0.0" - "@lerna/command" "5.0.0" - "@lerna/npm-conf" "5.0.0" - "@lerna/validation-error" "5.0.0" + "@lerna/child-process" "5.3.0" + "@lerna/command" "5.3.0" + "@lerna/npm-conf" "5.3.0" + "@lerna/validation-error" "5.3.0" dedent "^0.7.0" fs-extra "^9.1.0" globby "^11.0.2" - init-package-json "^2.0.2" - npm-package-arg "^8.1.0" + init-package-json "^3.0.2" + npm-package-arg "8.1.1" p-reduce "^2.1.0" - pacote "^13.4.1" + pacote "^13.6.1" pify "^5.0.0" semver "^7.3.4" slash "^3.0.0" validate-npm-package-license "^3.0.4" - validate-npm-package-name "^3.0.0" + validate-npm-package-name "^4.0.0" whatwg-url "^8.4.0" yargs-parser "20.2.4" -"@lerna/describe-ref@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/describe-ref/-/describe-ref-5.0.0.tgz#f0676843642e8880133783a9f059e6cb4c027fe1" - integrity sha512-iLvMHp3nl4wcMR3/lVkz0ng7pAHfLQ7yvz2HsYBq7wllCcEzpchzPgyVzyvbpJ+Ke/MKjQTsrHE/yOGOH67GVw== +"@lerna/describe-ref@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/describe-ref/-/describe-ref-5.3.0.tgz#5edd1d5ce314e6b51b8e2902f40dd0a7132c9daa" + integrity sha512-R+CtJcOuAF3kJ6GNQnGC3STEi+5OtpNVz2n17sAs/xqJnq79tPdzEhT+pMxB2eSEkQYlSr+cCKMpF0m/mtIPQA== dependencies: - "@lerna/child-process" "5.0.0" - npmlog "^4.1.2" + "@lerna/child-process" "5.3.0" + npmlog "^6.0.2" -"@lerna/diff@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/diff/-/diff-5.0.0.tgz#844333f5478fc4993c4389fee1e0cd8eff9114fe" - integrity sha512-S4XJ6i9oP77cSmJ3oRUJGMgrI+jOTmkYWur2nqgSdyJBE1J2eClgTJknb3WAHg2cHALT18WzFqNghFOGM+9dRA== +"@lerna/diff@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/diff/-/diff-5.3.0.tgz#51204c112d6154becd6ffcf9320ee415a95c58bd" + integrity sha512-i6f99dtO90u1QIJEfVtKE831m4gnMHBwY+4D84GY2SJMno8uI7ZyxMRZQh1nAFtvlNozO2MgzLr1OHtNMZOIgQ== dependencies: - "@lerna/child-process" "5.0.0" - "@lerna/command" "5.0.0" - "@lerna/validation-error" "5.0.0" - npmlog "^4.1.2" + "@lerna/child-process" "5.3.0" + "@lerna/command" "5.3.0" + "@lerna/validation-error" "5.3.0" + npmlog "^6.0.2" -"@lerna/exec@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/exec/-/exec-5.0.0.tgz#a59dd094e456ea46cfa8f713da0ea3334a7ec9ac" - integrity sha512-g5i+2RclCGWLsl88m11j99YM2Gqnwa2lxZ5tDeqqWZFno6Dlvop17Yl6/MFH42EgM2DQHUUCammvcLIAJ2XwEA== - dependencies: - "@lerna/child-process" "5.0.0" - "@lerna/command" "5.0.0" - "@lerna/filter-options" "5.0.0" - "@lerna/profiler" "5.0.0" - "@lerna/run-topologically" "5.0.0" - "@lerna/validation-error" "5.0.0" +"@lerna/exec@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/exec/-/exec-5.3.0.tgz#c680261e484c9b3072e3c56368523d3a8cab32f5" + integrity sha512-kI/IuF1hbT+pEMZc3v4+w8BLckUIi45ipzOP0bWvXNgSKKuADAU3HLv+ifRXEjob5906C+Zc7K2IVoVS6r1TDg== + dependencies: + "@lerna/child-process" "5.3.0" + "@lerna/command" "5.3.0" + "@lerna/filter-options" "5.3.0" + "@lerna/profiler" "5.3.0" + "@lerna/run-topologically" "5.3.0" + "@lerna/validation-error" "5.3.0" p-map "^4.0.0" -"@lerna/filter-options@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/filter-options/-/filter-options-5.0.0.tgz#1d2606e1d2ed106689b43cc5d41a77b239afb837" - integrity sha512-un73aYkXlzKlnDPx2AlqNW+ArCZ20XaX+Y6C0F+av9VZriiBsCgZTnflhih9fiSMnXjN5r9CA8YdWvZqa3oAcQ== +"@lerna/filter-options@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/filter-options/-/filter-options-5.3.0.tgz#08ba418787db5ee809aecebfa4e7a4461a6a5bbb" + integrity sha512-ddgy0oDisTKIhCJ4WY5CeEhTsyrbW+zeBvZ7rVaG0oQXjSSYBried4TXRvgy67fampfHoPX+eQq5l1SYTRFPlw== dependencies: - "@lerna/collect-updates" "5.0.0" - "@lerna/filter-packages" "5.0.0" + "@lerna/collect-updates" "5.3.0" + "@lerna/filter-packages" "5.3.0" dedent "^0.7.0" - npmlog "^4.1.2" + npmlog "^6.0.2" -"@lerna/filter-packages@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/filter-packages/-/filter-packages-5.0.0.tgz#9aae543ab5e45a1b0c3f7ad33e0686ceb8d92c88" - integrity sha512-+EIjVVaMPDZ05F/gZa+kcXjBOLXqEamcEIDr+2ZXRgJmnrLx9BBY1B7sBEFHg7JXbeOKS+fKtMGVveV0SzgH3Q== +"@lerna/filter-packages@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/filter-packages/-/filter-packages-5.3.0.tgz#3a5c73e01233921c50018d02809a9da9d82186db" + integrity sha512-5/2V50sQB2+JNwuCHP/UPm3y8PN2JWVY9CbNLtF3K5bymNsCkQh2KHEL9wlWZ4yfr/2ufpy4XFPaFUHNoUOGnQ== dependencies: - "@lerna/validation-error" "5.0.0" + "@lerna/validation-error" "5.3.0" multimatch "^5.0.0" - npmlog "^4.1.2" + npmlog "^6.0.2" -"@lerna/get-npm-exec-opts@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/get-npm-exec-opts/-/get-npm-exec-opts-5.0.0.tgz#25c1cd7d2b6c1fe903cd144d9f6e2d5cae47429b" - integrity sha512-ZOg3kc5FXYA1kVFD2hfJOl64hNASWD6panwD0HlyzXgfKKTDRm/P/qtAqS8WGCzQWgEdx4wvsDe/58Lzzh6QzQ== +"@lerna/get-npm-exec-opts@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/get-npm-exec-opts/-/get-npm-exec-opts-5.3.0.tgz#60d4fb6d1786b051d532a2c9dc91fcac722edcfb" + integrity sha512-cYBypDo8C7f4MvVvap2nYgtk8MXAADrYU1VdECSJ3Stbe4p2vBGt8bM9xkS2uPfQFMK3YSy3YPkSZcSjVXyoGw== dependencies: - npmlog "^4.1.2" + npmlog "^6.0.2" -"@lerna/get-packed@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/get-packed/-/get-packed-5.0.0.tgz#4de7f66184232c805dfca07b9a8c577f6ef02351" - integrity sha512-fks7Tg7DvcCZxRWPS3JAWVuLnwjPC/hLlNsdYmK9nN3+RtPhmYQgBjLSONcENw1E46t4Aph72lA9nLcYBLksqw== +"@lerna/get-packed@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/get-packed/-/get-packed-5.3.0.tgz#e1798e1be914f5f2b5671eba4c6a7c57e983fe46" + integrity sha512-kD12w7Ko5TThuOuPF2HBLyuPsHK3oyyWyzleGBqR4DqxMtbMRgimyTQnr5o58XBOwUPCFsv1EZiqeGk+3HTGEA== dependencies: fs-extra "^9.1.0" - ssri "^8.0.1" + ssri "^9.0.1" tar "^6.1.0" -"@lerna/github-client@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/github-client/-/github-client-5.0.0.tgz#65c984a393b1cbe35c2a707059c645bb9a03395e" - integrity sha512-NoEyRkQ8XgBnrjRfC9ph1npfg1/4OdYG+r8lG/1WkJbdt1Wlym4VNZU2BYPMWwSQYMJuppoEr0LL2uuVcS4ZUw== +"@lerna/github-client@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/github-client/-/github-client-5.3.0.tgz#45b97c0daa80ea03d8cacac841ea9474c57c2b71" + integrity sha512-UqAclsWDMthmbv3Z8QE1K7D/4e93ytg31mc+nEj+UdU+xJQ0L1ypl8zWAmGNs1sFkQntIiTIB4W5zgHet5mmZw== dependencies: - "@lerna/child-process" "5.0.0" + "@lerna/child-process" "5.3.0" "@octokit/plugin-enterprise-rest" "^6.0.1" - "@octokit/rest" "^18.1.0" - git-url-parse "^11.4.4" - npmlog "^4.1.2" + "@octokit/rest" "^19.0.3" + git-url-parse "^12.0.0" + npmlog "^6.0.2" -"@lerna/gitlab-client@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/gitlab-client/-/gitlab-client-5.0.0.tgz#c4e3d16566a3b07908ee604ce681a09c418481de" - integrity sha512-WREAT7qzta9hxNxktTX0x1/sEMpBP+4Gc00QSJYXt+ZzxY0t5RUx/ZK5pQl+IDhtkajrvXT6fSfZjMxxyE8hhQ== +"@lerna/gitlab-client@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/gitlab-client/-/gitlab-client-5.3.0.tgz#d24935717cd6fc2921f7fe73eac3dd70819bc4ce" + integrity sha512-otwbiaGDgvn5MGF1ypsCO48inMpdcxuiDlbxrKD6glPUwNHiGV+PU8LLCCDKimwjjQhl88ySLpL1oTm4jnZ1Aw== dependencies: node-fetch "^2.6.1" - npmlog "^4.1.2" + npmlog "^6.0.2" whatwg-url "^8.4.0" -"@lerna/global-options@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/global-options/-/global-options-5.0.0.tgz#02505c9e468188e3a254c262d58739092de93d8d" - integrity sha512-PZYy/3mTZwtA9lNmHHRCc/Ty1W20qGJ/BdDIo4bw/Bk0AOcoBCLT9b3Mjijkl4AbC9+eSGk3flUYapCGVuS32Q== +"@lerna/global-options@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/global-options/-/global-options-5.3.0.tgz#d244c6ad7d117433370818e1bbfd60cbafffd243" + integrity sha512-iEoFrDSU+KtfcB+lHW5grjg3VkEqzZNTUnWnE1FCBBwj9tSLOHjgKGtWWjIQtBUJ+qcLBbusap9Stqzr7UPYpQ== -"@lerna/has-npm-version@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/has-npm-version/-/has-npm-version-5.0.0.tgz#ed62c6ef857f068209663aae9f156f06a93dc1bd" - integrity sha512-zJPgcml86nhJFJTpT+kjkcafuCFvK7PSq3oDC2KJxwB1bhlYwy+SKtAEypHSsHQ2DwP0YgPITcy1pvtHkie1SA== +"@lerna/has-npm-version@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/has-npm-version/-/has-npm-version-5.3.0.tgz#0834cc58f1e7b9515227d79f8ebaa5af52b71bcf" + integrity sha512-A/bK8e+QP/VMqZkq1wZbyOzMz/AY92tAVsBOQ5Yw2zqshdMVj99st3YHLOqJf/HTEzQo27GGI/ajmcltHS2l6A== dependencies: - "@lerna/child-process" "5.0.0" + "@lerna/child-process" "5.3.0" semver "^7.3.4" -"@lerna/import@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/import/-/import-5.0.0.tgz#11cd83ef0fe854c512146fd4165f33519364b97a" - integrity sha512-cD+Is7eV/I+ZU0Wlg+yAgKaZbOvfzA7kBj2Qu1HtxeLhc7joTR8PFW1gNjEsvrWOTiaHAtObbo1A+MKYQ/T12g== - dependencies: - "@lerna/child-process" "5.0.0" - "@lerna/command" "5.0.0" - "@lerna/prompt" "5.0.0" - "@lerna/pulse-till-done" "5.0.0" - "@lerna/validation-error" "5.0.0" +"@lerna/import@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/import/-/import-5.3.0.tgz#9f020c3a8f486afc3ef839e6a59079411178e98c" + integrity sha512-KjVT9oFNSp1JLdrS1LSXjDcLiu2TMSfy6tpmhF9Zxo7oKB21SgWmXVV9rcWDueW2RIxNXDeVUG0NVNj2BRGeEQ== + dependencies: + "@lerna/child-process" "5.3.0" + "@lerna/command" "5.3.0" + "@lerna/prompt" "5.3.0" + "@lerna/pulse-till-done" "5.3.0" + "@lerna/validation-error" "5.3.0" dedent "^0.7.0" fs-extra "^9.1.0" p-map-series "^2.1.0" -"@lerna/info@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/info/-/info-5.0.0.tgz#649566474d0d133c22bb821f88e7d062a2beace5" - integrity sha512-k9TMK81apTjxxpnjfFOABKXndTtHBPgB8UO+I6zKhsfRqVb9FCz2MHOx8cQiSyolvNyGSQdSylSo4p7EBBomQQ== +"@lerna/info@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/info/-/info-5.3.0.tgz#7e3fe690df5bf6b6f01414561b3b31cb01528ece" + integrity sha512-pyeZSM/PIpBHCXdHPrbh6sPZlngXUxhTVFb0VaIjQ5Ms585xi15s1UQDO3FvzqdyMyalx0QGzCJbNx5XeoCejg== dependencies: - "@lerna/command" "5.0.0" - "@lerna/output" "5.0.0" + "@lerna/command" "5.3.0" + "@lerna/output" "5.3.0" envinfo "^7.7.4" -"@lerna/init@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/init/-/init-5.0.0.tgz#e35d95a4882aafb4600abf9b32fd1a0056e73ed9" - integrity sha512-2n68x7AIqVa+Vev9xF3NV9ba0C599KYf7JsIrQ5ESv4593ftInJpwgMwjroLT3X/Chi4BK7y2/xGmrfFVwgILg== +"@lerna/init@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/init/-/init-5.3.0.tgz#e1953858db749a48f7b7ebb66bf334b69db89888" + integrity sha512-y46lzEtgMdEseTJGQQqYZOjqqd7iN+e14vFh/9q5h62V4Y8nlUJRzovVo8JSeaGwKLB0B3dq3BuUn0PNywMhpA== dependencies: - "@lerna/child-process" "5.0.0" - "@lerna/command" "5.0.0" + "@lerna/child-process" "5.3.0" + "@lerna/command" "5.3.0" + "@lerna/project" "5.3.0" fs-extra "^9.1.0" p-map "^4.0.0" write-json-file "^4.3.0" -"@lerna/link@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/link/-/link-5.0.0.tgz#dbd5aefa0bb22f2fd9d61ee82009fb34eb946298" - integrity sha512-00YxQ06TVhQJthOjcuxCCJRjkAM+qM/8Lv0ckdCzBBCSr4RdAGBp6QcAX/gjLNasgmNpyiza3ADet7mCH7uodw== +"@lerna/link@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/link/-/link-5.3.0.tgz#3ff49118d91c0322c47e0eb7c3fc25fc16407212" + integrity sha512-+QBwnGg3S8Zk8M8G5CA4kmGq92rkEMbmWJXaxie3jQayp+GXgSlLs6R4jwSOZlztY6xR3WawMI9sHJ0Vdu+g7w== dependencies: - "@lerna/command" "5.0.0" - "@lerna/package-graph" "5.0.0" - "@lerna/symlink-dependencies" "5.0.0" + "@lerna/command" "5.3.0" + "@lerna/package-graph" "5.3.0" + "@lerna/symlink-dependencies" "5.3.0" p-map "^4.0.0" slash "^3.0.0" -"@lerna/list@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/list/-/list-5.0.0.tgz#0a979dc9c24ca176c7b4b58de80cab2dac2dcb8a" - integrity sha512-+B0yFil2AFdiYO8hyU1bFbKXGBAUUQQ43/fp2XS2jBFCipLme4eTILL5gMKOhr2Xg9AsfYPXRMRer5VW7qTeeQ== +"@lerna/list@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/list/-/list-5.3.0.tgz#c61d451ffe6054ddf5cbe5c13aba2f4b152e80c2" + integrity sha512-5RJvle3m4l2H0UmKNlwS8h2OIlNGsNTKPC4DYrJYt0+fhgzf5SEV1QKw+fuUqe3F8MziIkSGQB52HsjwPE6AWQ== dependencies: - "@lerna/command" "5.0.0" - "@lerna/filter-options" "5.0.0" - "@lerna/listable" "5.0.0" - "@lerna/output" "5.0.0" + "@lerna/command" "5.3.0" + "@lerna/filter-options" "5.3.0" + "@lerna/listable" "5.3.0" + "@lerna/output" "5.3.0" -"@lerna/listable@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/listable/-/listable-5.0.0.tgz#c1753d9375932b15c4c84cc767fffb3447b8f213" - integrity sha512-Rd5sE7KTbqA8u048qThH5IyBuJIwMcUnEObjFyJyKpc1SEWSumo4yAYmcEeN/9z62tcdud5wHYPSbVgfXJq37g== +"@lerna/listable@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/listable/-/listable-5.3.0.tgz#8817193159d46fe92ff28656791b04399812c67f" + integrity sha512-RdmeV9mDeuBOgVOlF/KNH/qttyiYwHbeqHiMAw9s9AfMo/Fz3iDZaTGZuruMm84TZSkKxI7m5mjTlC0djsyKog== dependencies: - "@lerna/query-graph" "5.0.0" + "@lerna/query-graph" "5.3.0" chalk "^4.1.0" - columnify "^1.5.4" + columnify "^1.6.0" -"@lerna/log-packed@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/log-packed/-/log-packed-5.0.0.tgz#afa35bb6a5736038d7dde039e09828ac1c4945a2" - integrity sha512-0TxKX+XnlEYj0du9U2kg3HEyIb/0QsM0Slt8utuCxALUnXRHTEKohjqVKsBdvh1QmJpnUbL5I+vfoYqno4Y42w== +"@lerna/log-packed@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/log-packed/-/log-packed-5.3.0.tgz#93ee09897f147da67beaa41ba2d86a642c53be4e" + integrity sha512-tDuOot3vSOUSP7fNNej8UM0fah5oy8mKXe026grt4J0OP4L3rhSWxhfrDBQ3Ylh2dAjgHzscUf/vpnNC9HnhOQ== dependencies: byte-size "^7.0.0" - columnify "^1.5.4" + columnify "^1.6.0" has-unicode "^2.0.1" - npmlog "^4.1.2" + npmlog "^6.0.2" -"@lerna/npm-conf@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/npm-conf/-/npm-conf-5.0.0.tgz#1364270d231d0df5ac079a9a9733ba0dd7f8c2f9" - integrity sha512-KSftxtMNVhLol1JNwFFNgh5jiCG010pewM+uKeSrUe0BCB3lnidiEDzu2CCn8JYYfIXqAiou/pScUiOxVLpcAA== +"@lerna/npm-conf@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/npm-conf/-/npm-conf-5.3.0.tgz#41b87554fba6343aeb16012d87080b85065a7073" + integrity sha512-ejlypb90tvIsKUCb0fcOKt7wcPEjLdVK2zfbNs0M+UlRDLyRVOHUVdelJ15cRDNjQHzhBo2HBUKn5Fmm/2pcmg== dependencies: config-chain "^1.1.12" pify "^5.0.0" -"@lerna/npm-dist-tag@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/npm-dist-tag/-/npm-dist-tag-5.0.0.tgz#becd7fb0bd963357818c8d4fae955cc9f8885cba" - integrity sha512-ccUFhp9Wu/FHW5/5fL+vLiSTcUZXtKQ7c0RMXtNRzIdTXBxPBkVi1k5QAnBAAffsz6Owc/K++cb+/zQ/asrG3g== +"@lerna/npm-dist-tag@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/npm-dist-tag/-/npm-dist-tag-5.3.0.tgz#70c15da5d1f001e6785cf0f89b25eba4cceb2694" + integrity sha512-OPahPk9QLXQXFgtrWm22NNxajVYKavCyTh8ijMwXTGXXbMJAw+PVjokfrUuEtg7FQi+kfJSrYAcJAxxfQq2eiA== dependencies: - "@lerna/otplease" "5.0.0" - npm-package-arg "^8.1.0" - npm-registry-fetch "^9.0.0" - npmlog "^4.1.2" + "@lerna/otplease" "5.3.0" + npm-package-arg "8.1.1" + npm-registry-fetch "^13.3.0" + npmlog "^6.0.2" -"@lerna/npm-install@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/npm-install/-/npm-install-5.0.0.tgz#0ee1750bb26eae3c2b4d742d5c1f055e46d534df" - integrity sha512-72Jf05JCIdeSBWXAiNjd/y2AQH4Ojgas55ojV2sAcEYz2wgyR7wSpiI6fHBRlRP+3XPjV9MXKxI3ZwOnznQxqQ== +"@lerna/npm-install@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/npm-install/-/npm-install-5.3.0.tgz#41d76cb4b74679bd41015b460573331e2976632c" + integrity sha512-scbWo8nW+P9KfitWG3y7Ep97dOs64ECfz9xfqtjagEXKYBPxG3skvwwljkfNnuxrCNs71JVD+imvcewHzih28g== dependencies: - "@lerna/child-process" "5.0.0" - "@lerna/get-npm-exec-opts" "5.0.0" + "@lerna/child-process" "5.3.0" + "@lerna/get-npm-exec-opts" "5.3.0" fs-extra "^9.1.0" - npm-package-arg "^8.1.0" - npmlog "^4.1.2" + npm-package-arg "8.1.1" + npmlog "^6.0.2" signal-exit "^3.0.3" write-pkg "^4.0.0" -"@lerna/npm-publish@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/npm-publish/-/npm-publish-5.0.0.tgz#a1a06e47e45e56999c85086a40f9b77f801b5a00" - integrity sha512-jnapZ2jRajSzshSfd1Y3rHH5R7QC+JJlYST04FBebIH3VePwDT7uAglDCI4um2THvxkW4420EzE4BUMUwKlnXA== +"@lerna/npm-publish@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/npm-publish/-/npm-publish-5.3.0.tgz#b53f47d441a2f776ded6af045a02f42cf06f1f26" + integrity sha512-n+ocN1Dxrs6AmrSNqZl57cwhP4/VjQXdEI+QYauNnErNjMQW8Wt+tNaTlVAhZ1DnorwAo86o2uzFF/BgdUqh9A== dependencies: - "@lerna/otplease" "5.0.0" - "@lerna/run-lifecycle" "5.0.0" + "@lerna/otplease" "5.3.0" + "@lerna/run-lifecycle" "5.3.0" fs-extra "^9.1.0" - libnpmpublish "^4.0.0" - npm-package-arg "^8.1.0" - npmlog "^4.1.2" + libnpmpublish "^6.0.4" + npm-package-arg "8.1.1" + npmlog "^6.0.2" pify "^5.0.0" - read-package-json "^3.0.0" + read-package-json "^5.0.1" -"@lerna/npm-run-script@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/npm-run-script/-/npm-run-script-5.0.0.tgz#114374b89f228c9719bbfacf9f08d6aac2739fb2" - integrity sha512-qgGf0Wc/E2YxPwIiF8kC/OB9ffPf0/HVtPVkqrblVuNE9XVP80WilOH966PIDiXzwXaCo/cTswFoBeseccYRGw== +"@lerna/npm-run-script@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/npm-run-script/-/npm-run-script-5.3.0.tgz#28745ec962398ab864837155e9b0732aa119071f" + integrity sha512-2cLR1YdzeMjaMKgDuwHE+iZgVPt+Ttzb3/wFtp7Mw9TlKmNIdbHdrnfl12ABz5knPC+62CCNjB/gznfLndPp2w== dependencies: - "@lerna/child-process" "5.0.0" - "@lerna/get-npm-exec-opts" "5.0.0" - npmlog "^4.1.2" + "@lerna/child-process" "5.3.0" + "@lerna/get-npm-exec-opts" "5.3.0" + npmlog "^6.0.2" -"@lerna/otplease@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/otplease/-/otplease-5.0.0.tgz#5b0419f64908d7ad840c2735e0284d67cd37095b" - integrity sha512-QLLkEy1DPN1XFRAAZDHxAD26MHFQDHfzB6KKSzRYxbHc6lH/YbDaMH1RloSWIm7Hwkxl/3NgpokgN4Lj5XFuzg== +"@lerna/otplease@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/otplease/-/otplease-5.3.0.tgz#96b4bd0c31387811684fdedc33465a548927fddf" + integrity sha512-Xpju2VC5TiycmBP/mdp9hRstkH2MLm8/7o2NotVTCJwASWdKphRMqezhh5BX0E9i6VyrjzmTqSYEh9FNZZ9MwQ== dependencies: - "@lerna/prompt" "5.0.0" + "@lerna/prompt" "5.3.0" -"@lerna/output@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/output/-/output-5.0.0.tgz#f3712f0cad3e9ef73c803fe368f6a9ac20403868" - integrity sha512-/7sUJQWPcvnLudjVIdN7t9MlfBLuP4JCDAWgQMqZe+wpQRuKNyKQ5dLBH5NHU/ElJCjAwMPfWuk3mh3GuvuiGA== +"@lerna/output@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/output/-/output-5.3.0.tgz#bfcf7d6ada32d3b94655c39441f6aba36fc60012" + integrity sha512-fISmHDu/9PKInFmT5NXsbh8cR6aE6SUXWrteXJ6PBYK30s0f/pVcfswb9VccX0Yea8HmqMQgCHWUWifkZeXiRA== dependencies: - npmlog "^4.1.2" + npmlog "^6.0.2" -"@lerna/pack-directory@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/pack-directory/-/pack-directory-5.0.0.tgz#f277418545786ca68ca15647bab52ad29bd57f59" - integrity sha512-E1SNDS7xSWhJrTSmRzJK7DibneljrymviKcsZW3mRl4TmF4CpYJmNXCMlhEtKEy6ghnGQvnl3/4+eslHDJ5J/w== - dependencies: - "@lerna/get-packed" "5.0.0" - "@lerna/package" "5.0.0" - "@lerna/run-lifecycle" "5.0.0" - "@lerna/temp-write" "5.0.0" - npm-packlist "^2.1.4" - npmlog "^4.1.2" +"@lerna/pack-directory@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/pack-directory/-/pack-directory-5.3.0.tgz#043c45b5e825dc002c3de21f00be3b192bd12b0d" + integrity sha512-dTGMUB6/GjExhmLZ8yeFaRKJuSm6M/IsfxSJdL4gFPLigUIAS4XhzXS3KnL0+Ef1ue1yaTlAE9c/czfkE0pc/w== + dependencies: + "@lerna/get-packed" "5.3.0" + "@lerna/package" "5.3.0" + "@lerna/run-lifecycle" "5.3.0" + "@lerna/temp-write" "5.3.0" + npm-packlist "^5.1.1" + npmlog "^6.0.2" tar "^6.1.0" -"@lerna/package-graph@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/package-graph/-/package-graph-5.0.0.tgz#53e88ef46359ef7a2f6e3b7c5bab82302a10653f" - integrity sha512-Z3QeUQVjux0Blo64rA3/NivoLDlsQBjsZRIgGLbcQh7l7pJrqLK1WyNCBbPJ0KQNljQqUXthCKzdefnEWe37Ew== +"@lerna/package-graph@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/package-graph/-/package-graph-5.3.0.tgz#6a8e87ce55628d2daef31f317d7916fc05274210" + integrity sha512-UEHY7l/yknwFvQgo0RifyY+B5QdzuFutLZYSN1BMmyWttOZD9rkM263qnLNGTZ2BUE4dXDwwwOHuhLvi+xDRsA== dependencies: - "@lerna/prerelease-id-from-version" "5.0.0" - "@lerna/validation-error" "5.0.0" - npm-package-arg "^8.1.0" - npmlog "^4.1.2" + "@lerna/prerelease-id-from-version" "5.3.0" + "@lerna/validation-error" "5.3.0" + npm-package-arg "8.1.1" + npmlog "^6.0.2" semver "^7.3.4" -"@lerna/package@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/package/-/package-5.0.0.tgz#4beeb3a1e8eed6e7ae9cebca283c7684278cdd28" - integrity sha512-/JiUU88bhbYEUTzPqoGLGwrrdWWTIVMlBb1OPxCGNGDEqYYNySX+OTTSs3zGMcmJnRNI0UyQALiEd0sh3JFN5w== +"@lerna/package@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/package/-/package-5.3.0.tgz#8985035bfdaa91b99b855b9d1abb86aa9cc2cc74" + integrity sha512-hsB03miiaNdvZ/UGzl0sVqxVat5x33EG9JiYgIoFqzroQPrG+WShmX3ctuO06TY1pxb4iNuHLPIbQomHEzzj8w== dependencies: load-json-file "^6.2.0" - npm-package-arg "^8.1.0" + npm-package-arg "8.1.1" write-pkg "^4.0.0" -"@lerna/prerelease-id-from-version@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/prerelease-id-from-version/-/prerelease-id-from-version-5.0.0.tgz#3edb90ba9ceace97708d03ff9f650d177f973184" - integrity sha512-bUZwyx6evRn2RxogOQXaiYxRK1U/1Mh/KLO4n49wUhqb8S8Vb9aG3+7lLOgg4ZugHpj9KAlD3YGEKvwYQiWzhg== +"@lerna/prerelease-id-from-version@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/prerelease-id-from-version/-/prerelease-id-from-version-5.3.0.tgz#dc806da65600458c5567728e18a1b29053d9fd10" + integrity sha512-o1wsLns6hFTsmk4iqTRJNWLnFzlBBwgu17hp8T2iU4U7LUlDT2ZSKV3smGAU6GfrwX3MAp4LZ5syxgjFjrUOnw== dependencies: semver "^7.3.4" -"@lerna/profiler@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/profiler/-/profiler-5.0.0.tgz#e1b74d17dbd6172b5ce9c80426b336bf6ab2e8e9" - integrity sha512-hFX+ZtoH7BdDoGI+bqOYaSptJTFI58wNK9qq/pHwL5ksV7vOhxP2cQAuo1SjgBKHGl0Ex/9ZT080YVV4jP1ehw== +"@lerna/profiler@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/profiler/-/profiler-5.3.0.tgz#42db1b4e62de7a030db3af86175ebf16f7d92533" + integrity sha512-LEZYca29EPgZR0q5E+7CJkn25Cw3OxNMQJU/CVn/HGeoWYWOpoDxujrZBl8is2bw06LHXvRbVXEUATLc+ACbqQ== dependencies: fs-extra "^9.1.0" - npmlog "^4.1.2" + npmlog "^6.0.2" upath "^2.0.1" -"@lerna/project@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/project/-/project-5.0.0.tgz#31672891236696b2a70226388de0300c6086d75f" - integrity sha512-+izHk7D/Di2b0s69AzKzAa/qBz32H9s67oN9aKntrjNylpY7iN5opU157l60Kh4TprYHU5bLisqzFLZsHHADGw== +"@lerna/project@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/project/-/project-5.3.0.tgz#1727a81f4b945b491dfed5d1a0ed2ea3dc3329cc" + integrity sha512-InhIo9uwT1yod72ai5SKseJSUk8KkqG6COmwp1/45vibbawb7ZLbokpns7n46A0NdGNlmwJolamybYOuyumejw== dependencies: - "@lerna/package" "5.0.0" - "@lerna/validation-error" "5.0.0" + "@lerna/package" "5.3.0" + "@lerna/validation-error" "5.3.0" cosmiconfig "^7.0.0" dedent "^0.7.0" dot-prop "^6.0.1" glob-parent "^5.1.1" globby "^11.0.2" load-json-file "^6.2.0" - npmlog "^4.1.2" + npmlog "^6.0.2" p-map "^4.0.0" resolve-from "^5.0.0" write-json-file "^4.3.0" -"@lerna/prompt@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/prompt/-/prompt-5.0.0.tgz#31d3d82ecd17e863f8b7cc7944accff4f3de3395" - integrity sha512-cq2k04kOPY1yuJNHJn4qfBDDrCi9PF4Q228JICa6bxaONRf/C/TRsEQXHVIdlax8B3l53LnlGv5GECwRuvkQbA== +"@lerna/prompt@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/prompt/-/prompt-5.3.0.tgz#0565cdbb092e71d8e2ce4a18a8c44db3c5ff7c17" + integrity sha512-4bIusBdjpw665CJtFsVsaB55hLHnmKnrcOaRjna6N/MdJDl8Th6X4EM4rrfXTX/uUNR3XcV91lYqcLuLmrpm5w== dependencies: - inquirer "^7.3.3" - npmlog "^4.1.2" + inquirer "^8.2.4" + npmlog "^6.0.2" -"@lerna/publish@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/publish/-/publish-5.0.0.tgz#27c4c469e6abd5b52e977568d328632929e859b1" - integrity sha512-QEWFtN8fW1M+YXEQOWb2XBBCT137CrwHYK29ojMXW9HShvSZezf8Q/niH91nZ4kIhWdpOGz4w3rKopsumAM5SA== - dependencies: - "@lerna/check-working-tree" "5.0.0" - "@lerna/child-process" "5.0.0" - "@lerna/collect-updates" "5.0.0" - "@lerna/command" "5.0.0" - "@lerna/describe-ref" "5.0.0" - "@lerna/log-packed" "5.0.0" - "@lerna/npm-conf" "5.0.0" - "@lerna/npm-dist-tag" "5.0.0" - "@lerna/npm-publish" "5.0.0" - "@lerna/otplease" "5.0.0" - "@lerna/output" "5.0.0" - "@lerna/pack-directory" "5.0.0" - "@lerna/prerelease-id-from-version" "5.0.0" - "@lerna/prompt" "5.0.0" - "@lerna/pulse-till-done" "5.0.0" - "@lerna/run-lifecycle" "5.0.0" - "@lerna/run-topologically" "5.0.0" - "@lerna/validation-error" "5.0.0" - "@lerna/version" "5.0.0" +"@lerna/publish@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/publish/-/publish-5.3.0.tgz#136af3be2c0779a9994aa6fbc0d24fb15438c68e" + integrity sha512-T8T1BQdI+NnlVARKwIXzILknEuiQlZToBsDpuX06M7+45t/pp9Z+u6pVt3rrqwiUPZ/dpoZzYKI31YdNJtGMcQ== + dependencies: + "@lerna/check-working-tree" "5.3.0" + "@lerna/child-process" "5.3.0" + "@lerna/collect-updates" "5.3.0" + "@lerna/command" "5.3.0" + "@lerna/describe-ref" "5.3.0" + "@lerna/log-packed" "5.3.0" + "@lerna/npm-conf" "5.3.0" + "@lerna/npm-dist-tag" "5.3.0" + "@lerna/npm-publish" "5.3.0" + "@lerna/otplease" "5.3.0" + "@lerna/output" "5.3.0" + "@lerna/pack-directory" "5.3.0" + "@lerna/prerelease-id-from-version" "5.3.0" + "@lerna/prompt" "5.3.0" + "@lerna/pulse-till-done" "5.3.0" + "@lerna/run-lifecycle" "5.3.0" + "@lerna/run-topologically" "5.3.0" + "@lerna/validation-error" "5.3.0" + "@lerna/version" "5.3.0" fs-extra "^9.1.0" - libnpmaccess "^4.0.1" - npm-package-arg "^8.1.0" - npm-registry-fetch "^9.0.0" - npmlog "^4.1.2" + libnpmaccess "^6.0.3" + npm-package-arg "8.1.1" + npm-registry-fetch "^13.3.0" + npmlog "^6.0.2" p-map "^4.0.0" p-pipe "^3.1.0" - pacote "^13.4.1" + pacote "^13.6.1" semver "^7.3.4" -"@lerna/pulse-till-done@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/pulse-till-done/-/pulse-till-done-5.0.0.tgz#df3c32c2d7457362956d997da366f5c060953eef" - integrity sha512-qFeVybGIZbQSWKasWIzZmHsvCQMC/AwTz5B44a0zTt5eSNQuI65HRpKKUgmFFu/Jzd7u+yp7eP+NQ53gjOcQlQ== +"@lerna/pulse-till-done@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/pulse-till-done/-/pulse-till-done-5.3.0.tgz#6342a2ceb915597e909fea30769d0afc55e70524" + integrity sha512-yNvSuPLT1ZTtD2LMVOmiDhw4+9qkyf6xCpfxiUp4cGEN+qIuazWB5JicKLE49o27DBdaG8Ao4lAlb16x/gNrwQ== dependencies: - npmlog "^4.1.2" + npmlog "^6.0.2" -"@lerna/query-graph@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/query-graph/-/query-graph-5.0.0.tgz#76c45f648915ef5c884c32c3d35daa3ebb53440b" - integrity sha512-C/HXssBI8DVsZ/7IDW6JG9xhoHtWywi3L5oZB9q84MBYpQ9otUv6zbB+K4JCj7w9WHcuFWe2T/mc9wsaFuvB5g== +"@lerna/query-graph@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/query-graph/-/query-graph-5.3.0.tgz#596f7827b7d0ac9d1217ac5ab6d9e62ba5388a2c" + integrity sha512-t99lNj97/Vilp5Js1Be7MoyaZ5U0fbOFh0E7lnTfSLvZhTkPMK6xLvAx2M3NQqhwYCQjTFDuf9ozQ3HQtYZAmA== dependencies: - "@lerna/package-graph" "5.0.0" + "@lerna/package-graph" "5.3.0" -"@lerna/resolve-symlink@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/resolve-symlink/-/resolve-symlink-5.0.0.tgz#edff89908e90a390791ab762305d34aa95e7bdbe" - integrity sha512-O1EMQh3O3nKjLyI2guCCaxmi9xzZXpiMZhrz2ki5ENEDB2N1+f7cZ2THT0lEOIkLRuADI6hrzoN1obJ+TTk+KQ== +"@lerna/resolve-symlink@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/resolve-symlink/-/resolve-symlink-5.3.0.tgz#6150b65905910fc34fce6c781516b89c853c394e" + integrity sha512-zKI7rV5FzzlMBfi6kjDS0ulzcdDTORvdOJ/+CHU5C2h+v+P64Nk2VhZZNCCBDoO/l4GRhgehZOB70GIamO1TSw== dependencies: fs-extra "^9.1.0" - npmlog "^4.1.2" - read-cmd-shim "^2.0.0" + npmlog "^6.0.2" + read-cmd-shim "^3.0.0" -"@lerna/rimraf-dir@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/rimraf-dir/-/rimraf-dir-5.0.0.tgz#9e7689610415e6d68c9e766a462c8acfdbf04b9a" - integrity sha512-hWJg/13CiSUrWWEek3B/A1mkvBbcPvG5z69/Ugyerdpzlw44ubf02MAZ0/kXPJjkICI2hMrS07YotQ60LdYpCw== +"@lerna/rimraf-dir@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/rimraf-dir/-/rimraf-dir-5.3.0.tgz#765855a30d68f62b1af993e644e4d5f4224bfdb4" + integrity sha512-/QJebh0tSY3LjgEyOo+6NH/b7ZNw9IpjqiDtvnLixjtdfkgli1OKOoZTa4KrO0mJoqMRq4yAa98cjpIzyKqCqw== dependencies: - "@lerna/child-process" "5.0.0" - npmlog "^4.1.2" + "@lerna/child-process" "5.3.0" + npmlog "^6.0.2" path-exists "^4.0.0" rimraf "^3.0.2" -"@lerna/run-lifecycle@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/run-lifecycle/-/run-lifecycle-5.0.0.tgz#0f62c2faebc19e4ee247bdfa1e05b2a9f51b0637" - integrity sha512-36mAm9rC5DSliFShI0Y4ICjgrJXdIIVt7VW9rdbdJ8/XYjRHDzhGPB9Sc1neJOVlGL4DmaArvh5tGgo62KPJYQ== +"@lerna/run-lifecycle@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/run-lifecycle/-/run-lifecycle-5.3.0.tgz#e884e4c5503bc7431ddec2bb457d74f0817312ad" + integrity sha512-EuBCGwm2PLgkebfyqo3yNkwfSb1EzHeo3lA8t4yld6LXWkgUPBFhc7RwRc6TsQOpjpfFvDSGoI282R01o0jPVQ== dependencies: - "@lerna/npm-conf" "5.0.0" - "@npmcli/run-script" "^3.0.2" - npmlog "^4.1.2" + "@lerna/npm-conf" "5.3.0" + "@npmcli/run-script" "^4.1.7" + npmlog "^6.0.2" + p-queue "^6.6.2" -"@lerna/run-topologically@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/run-topologically/-/run-topologically-5.0.0.tgz#0b0156e3ebe2bf768b9ba1339e02e947e70d1dd1" - integrity sha512-B2s1N/+r3sfPOLRA2svNk+C52JpXQleMuGap0yhOx5mZzR1M2Lo4vpe9Ody4hCvXQjfdLx/U342fxVmgugUtfQ== +"@lerna/run-topologically@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/run-topologically/-/run-topologically-5.3.0.tgz#4080a499d73c0e592331e55b219ea46a4485958f" + integrity sha512-WiFF2EiwLjAguKs0lEmcukTL7WhuWFwxNprrGWFxEkBhlGdMFk18n8BaZN8FO26xqzztzuPzSx1re/f/dEEAPg== dependencies: - "@lerna/query-graph" "5.0.0" + "@lerna/query-graph" "5.3.0" p-queue "^6.6.2" -"@lerna/run@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/run/-/run-5.0.0.tgz#3af69d1a787866cf85072a0ae9571b9c3bf262e7" - integrity sha512-8nBZstqKSO+7wHlKk1g+iexSYRVVNJq/u5ZbAzBiHNrABtqA6/0G7q9vsAEMsnPZ8ARAUYpwvbfKTipjpWH0VA== - dependencies: - "@lerna/command" "5.0.0" - "@lerna/filter-options" "5.0.0" - "@lerna/npm-run-script" "5.0.0" - "@lerna/output" "5.0.0" - "@lerna/profiler" "5.0.0" - "@lerna/run-topologically" "5.0.0" - "@lerna/timer" "5.0.0" - "@lerna/validation-error" "5.0.0" +"@lerna/run@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/run/-/run-5.3.0.tgz#628395f0aaf28714d002cceeb96d4a3903965043" + integrity sha512-KwoKTj1w71OmUHONNYhZME+tr5lk9Q4f+3LUr2WtWZRuOAGO5ZCRrcZc+N4Ib7zno89Ub6Ovz51fcjwltLh72w== + dependencies: + "@lerna/command" "5.3.0" + "@lerna/filter-options" "5.3.0" + "@lerna/npm-run-script" "5.3.0" + "@lerna/output" "5.3.0" + "@lerna/profiler" "5.3.0" + "@lerna/run-topologically" "5.3.0" + "@lerna/timer" "5.3.0" + "@lerna/validation-error" "5.3.0" p-map "^4.0.0" -"@lerna/symlink-binary@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/symlink-binary/-/symlink-binary-5.0.0.tgz#f9da5673ed3a44570fa4d2e691759f82bd7ad057" - integrity sha512-uYyiiNjkdL1tWf8MDXIIyCa/a2gmYaUxagqMgEZ4wRtOk+PDypDwMUFVop/EQtUWZqG5CAJBJYOztG3DdapTbA== +"@lerna/symlink-binary@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/symlink-binary/-/symlink-binary-5.3.0.tgz#21aeeff1ed8c8b611d1c722292c31d8344f34262" + integrity sha512-dIATASuGS6y512AGjacOoTpkFDPsKlhggjzL3KLdSNmxV3288nUqaFBuA7rTnnMNnBQ7jVuE1JKJupZnzPN0cA== dependencies: - "@lerna/create-symlink" "5.0.0" - "@lerna/package" "5.0.0" + "@lerna/create-symlink" "5.3.0" + "@lerna/package" "5.3.0" fs-extra "^9.1.0" p-map "^4.0.0" -"@lerna/symlink-dependencies@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/symlink-dependencies/-/symlink-dependencies-5.0.0.tgz#878b0f52737f82bb7014e13afda8efc606fc071c" - integrity sha512-wlZGOOB87XMy278hpF4fOwGNnjTXf1vJ/cFHIdKsJAiDipyhtnuCiJLBDPh4NzEGb02o4rhaqt8Nl5yWRu9CNA== +"@lerna/symlink-dependencies@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/symlink-dependencies/-/symlink-dependencies-5.3.0.tgz#ece40a7767d946c5438563fe60579418acd01768" + integrity sha512-qkq4YT/Bdrb3W22ve+d2Gy3hRTrtT/zBhjKTCukEpYsFJLwSjZ4z5vbv6J15/j6PN1Km9oTRp6vBYmdjAuARQQ== dependencies: - "@lerna/create-symlink" "5.0.0" - "@lerna/resolve-symlink" "5.0.0" - "@lerna/symlink-binary" "5.0.0" + "@lerna/create-symlink" "5.3.0" + "@lerna/resolve-symlink" "5.3.0" + "@lerna/symlink-binary" "5.3.0" fs-extra "^9.1.0" p-map "^4.0.0" p-map-series "^2.1.0" -"@lerna/temp-write@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/temp-write/-/temp-write-5.0.0.tgz#44f8c7c82f498e15db33c166d063be117b819162" - integrity sha512-JOkRR6xyASuBy1udyS/VD52Wgywnz7cSKppD+QKIDseNzTq27I9mNmb702BSXNXIdD19lLVQ7q6WoAlpnelnZg== +"@lerna/temp-write@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/temp-write/-/temp-write-5.3.0.tgz#6c926ad21c6b1932ead202e735d3cc8a5322e4e6" + integrity sha512-AhC5Q+tV0yebEc1P2jsB4apQzztW8dgdLLc1G1Pkt46l5vezRGhZmsj+iUyCsVjpdUSO/UcAq1DbI2Xzhf5arg== dependencies: graceful-fs "^4.1.15" is-stream "^2.0.0" @@ -1293,42 +1311,42 @@ temp-dir "^1.0.0" uuid "^8.3.2" -"@lerna/timer@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/timer/-/timer-5.0.0.tgz#ab8fba29f90de21b0eb02406916269122deb2e41" - integrity sha512-p2vevkpB6V/b0aR8VyMLDfg0Arp9VvMxcZOEu+IfZ9XKTtnbwjWPHKUOS34x/VGa6bnOIWjE046ixWymOs/fTw== +"@lerna/timer@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/timer/-/timer-5.3.0.tgz#b3da6c71bb37eb313cf30d333eb7f0d841976e55" + integrity sha512-IeDjj1gJtbUPKl2ebpiml9u4k2kRqYF1Dbs6JuWpeC7lGxAx3JcUmkNH2RQ1BYTxk5xc9FKlgNMrZQwhq2K1Ow== -"@lerna/validation-error@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/validation-error/-/validation-error-5.0.0.tgz#3d3557023e3eb2fd3d8fc9c89f7352a1b6e5bd3e" - integrity sha512-fu/MhqRXiRQM2cirP/HoSkfwc5XtJ21G60WHv74RnanKBqWEZAUALWa3MQN2sYhVV/FpDW3GLkO008IW5NWzdg== +"@lerna/validation-error@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/validation-error/-/validation-error-5.3.0.tgz#21c2054079ab997cd9ec8fa6fde5685d5fda68a9" + integrity sha512-GVvnTxx+CNFjXCiJahAu2c/pP2R3DhGuQp4CJUyKegnzGaWK0h5PhlwRL7/LbDMPLh2zLobPOVr9kTOjwv76Nw== dependencies: - npmlog "^4.1.2" + npmlog "^6.0.2" -"@lerna/version@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/version/-/version-5.0.0.tgz#36a808e8b4458febd58a6b76852f2ce30e740ca1" - integrity sha512-M8KvdyG5kR/d3wgg5S46Q2YMf0L9iw9MiumTvlDP4ckysTt+04kS74Vp4+aClgPM4xaoI5OuMrs6wy5ICcd3Pw== - dependencies: - "@lerna/check-working-tree" "5.0.0" - "@lerna/child-process" "5.0.0" - "@lerna/collect-updates" "5.0.0" - "@lerna/command" "5.0.0" - "@lerna/conventional-commits" "5.0.0" - "@lerna/github-client" "5.0.0" - "@lerna/gitlab-client" "5.0.0" - "@lerna/output" "5.0.0" - "@lerna/prerelease-id-from-version" "5.0.0" - "@lerna/prompt" "5.0.0" - "@lerna/run-lifecycle" "5.0.0" - "@lerna/run-topologically" "5.0.0" - "@lerna/temp-write" "5.0.0" - "@lerna/validation-error" "5.0.0" +"@lerna/version@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/version/-/version-5.3.0.tgz#011d7e1fd6f286186c6c216737249fccedd8b2df" + integrity sha512-QOQSAdpeP66oQQ20nNZ4NhJS5NtZZDGyz36kP/4BeqjGK6QgtrEmto4+vmWj49w3VJUIXnrqAKHiPkhFUmJm5Q== + dependencies: + "@lerna/check-working-tree" "5.3.0" + "@lerna/child-process" "5.3.0" + "@lerna/collect-updates" "5.3.0" + "@lerna/command" "5.3.0" + "@lerna/conventional-commits" "5.3.0" + "@lerna/github-client" "5.3.0" + "@lerna/gitlab-client" "5.3.0" + "@lerna/output" "5.3.0" + "@lerna/prerelease-id-from-version" "5.3.0" + "@lerna/prompt" "5.3.0" + "@lerna/run-lifecycle" "5.3.0" + "@lerna/run-topologically" "5.3.0" + "@lerna/temp-write" "5.3.0" + "@lerna/validation-error" "5.3.0" chalk "^4.1.0" dedent "^0.7.0" load-json-file "^6.2.0" minimatch "^3.0.4" - npmlog "^4.1.2" + npmlog "^6.0.2" p-map "^4.0.0" p-pipe "^3.1.0" p-reduce "^2.1.0" @@ -1337,13 +1355,13 @@ slash "^3.0.0" write-json-file "^4.3.0" -"@lerna/write-log-file@5.0.0": - version "5.0.0" - resolved "https://registry.yarnpkg.com/@lerna/write-log-file/-/write-log-file-5.0.0.tgz#ad3d33d6153b962beef48442ab6472233b5d5197" - integrity sha512-kpPNxe9xm36QbCWY7DwO96Na6FpCHzZinJtw6ttBHslIcdR38lZuCp+/2KfJcVsRIPNOsp1VvgP7EZIKiBhgjw== +"@lerna/write-log-file@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@lerna/write-log-file/-/write-log-file-5.3.0.tgz#3aa6621c56f020e642c5c3965a33771111d14f52" + integrity sha512-cmrNAI5+9auUJSuTVrUzt2nb/KX6htgjdw7gGPMI1Tm6cdBIbs67R6LedZ8yvYOLGsXB2Se93vxv5fTgEHWfCw== dependencies: - npmlog "^4.1.2" - write-file-atomic "^3.0.3" + npmlog "^6.0.2" + write-file-atomic "^4.0.1" "@mattiasbuelens/web-streams-adapter@~0.1.0": version "0.1.0" @@ -1371,10 +1389,10 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@npmcli/arborist@5.2.0": - version "5.2.0" - resolved "https://registry.yarnpkg.com/@npmcli/arborist/-/arborist-5.2.0.tgz#ee40dfe1f81ae1524819ee39c8f3e7022b0d6269" - integrity sha512-zWV7scFGL0SmpvfQyIWnMFbU/0YgtMNyvJiJwR98kyjUSntJGWFFR0O600d5W+TrDcTg0GyDbY+HdzGEg+GXLg== +"@npmcli/arborist@5.3.0": + version "5.3.0" + resolved "https://registry.yarnpkg.com/@npmcli/arborist/-/arborist-5.3.0.tgz#321d9424677bfc08569e98a5ac445ee781f32053" + integrity sha512-+rZ9zgL1lnbl8Xbb1NQdMjveOMwj4lIYfcDtyJHHi5x4X8jtR6m8SXooJMZy5vmFVZ8w7A2Bnd/oX9eTuU8w5A== dependencies: "@isaacs/string-locale-compare" "^1.1.0" "@npmcli/installed-package-contents" "^1.0.7" @@ -1384,7 +1402,7 @@ "@npmcli/name-from-folder" "^1.0.1" "@npmcli/node-gyp" "^2.0.0" "@npmcli/package-json" "^2.0.0" - "@npmcli/run-script" "^3.0.0" + "@npmcli/run-script" "^4.1.3" bin-links "^3.0.0" cacache "^16.0.6" common-ancestor-path "^1.0.1" @@ -1398,7 +1416,7 @@ npm-pick-manifest "^7.0.0" npm-registry-fetch "^13.0.0" npmlog "^6.0.2" - pacote "^13.0.5" + pacote "^13.6.1" parse-conflict-json "^2.0.1" proc-log "^2.0.0" promise-all-reject-late "^1.0.0" @@ -1411,23 +1429,10 @@ treeverse "^2.0.0" walk-up-path "^1.0.0" -"@npmcli/ci-detect@^1.0.0": - version "1.4.0" - resolved "https://registry.yarnpkg.com/@npmcli/ci-detect/-/ci-detect-1.4.0.tgz#18478bbaa900c37bfbd8a2006a6262c62e8b0fe1" - integrity sha512-3BGrt6FLjqM6br5AhWRKTr3u5GIVkjRYeAFrMp3HjnfICrg4xOrVRwFavKT6tsp++bq5dluL5t8ME/Nha/6c1Q== - -"@npmcli/fs@^1.0.0": - version "1.1.1" - resolved "https://registry.yarnpkg.com/@npmcli/fs/-/fs-1.1.1.tgz#72f719fe935e687c56a4faecf3c03d06ba593257" - integrity sha512-8KG5RD0GVP4ydEzRn/I4BNDuxDtqVbOdm8675T49OIG/NGhaK0pjPX7ZcDlvKYbA+ulvVK3ztfcF4uBdOxuJbQ== - dependencies: - "@gar/promisify" "^1.0.1" - semver "^7.3.5" - "@npmcli/fs@^2.1.0": - version "2.1.0" - resolved "https://registry.yarnpkg.com/@npmcli/fs/-/fs-2.1.0.tgz#f2a21c28386e299d1a9fae8051d35ad180e33109" - integrity sha512-DmfBvNXGaetMxj9LTp8NAN9vEidXURrf5ZTslQzEAi/6GbW+4yjaLFQc6Tue5cpZ9Frlk4OBo/Snf1Bh/S7qTQ== + version "2.1.1" + resolved "https://registry.yarnpkg.com/@npmcli/fs/-/fs-2.1.1.tgz#c0c480b03450d8b9fc086816a50cb682668a48bf" + integrity sha512-1Q0uzx6c/NVNGszePbr5Gc2riSU1zLpNlo/1YWntH+eaPmMgBssAW0qXofCVkpdj3ce4swZtlDYQu+NKiYcptg== dependencies: "@gar/promisify" "^1.1.3" semver "^7.3.5" @@ -1456,9 +1461,9 @@ npm-normalize-package-bin "^1.0.1" "@npmcli/map-workspaces@^2.0.3": - version "2.0.3" - resolved "https://registry.yarnpkg.com/@npmcli/map-workspaces/-/map-workspaces-2.0.3.tgz#2d3c75119ee53246e9aa75bc469a55281cd5f08f" - integrity sha512-X6suAun5QyupNM8iHkNPh0AHdRC2rb1W+MTdMvvA/2ixgmqZwlq5cGUBgmKHUHT2LgrkKJMAXbfAoTxOigpK8Q== + version "2.0.4" + resolved "https://registry.yarnpkg.com/@npmcli/map-workspaces/-/map-workspaces-2.0.4.tgz#9e5e8ab655215a262aefabf139782b894e0504fc" + integrity sha512-bMo0aAfwhVwqoVM5UzX1DJnlvVvzDCHae821jv48L1EsrYwfOZChlqWYXEtto/+BkBXetPbEWgau++/brh4oVg== dependencies: "@npmcli/name-from-folder" "^1.0.1" glob "^8.0.1" @@ -1466,23 +1471,15 @@ read-package-json-fast "^2.0.3" "@npmcli/metavuln-calculator@^3.0.1": - version "3.1.0" - resolved "https://registry.yarnpkg.com/@npmcli/metavuln-calculator/-/metavuln-calculator-3.1.0.tgz#b1c2f0991c4f2d992b1615a54d4358c05efc3702" - integrity sha512-Q5fbQqGDlYqk7kWrbg6E2j/mtqQjZop0ZE6735wYA1tYNHguIDjAuWs+kFb5rJCkLIlXllfapvsyotYKiZOTBA== + version "3.1.1" + resolved "https://registry.yarnpkg.com/@npmcli/metavuln-calculator/-/metavuln-calculator-3.1.1.tgz#9359bd72b400f8353f6a28a25c8457b562602622" + integrity sha512-n69ygIaqAedecLeVH3KnO39M6ZHiJ2dEv5A7DGvcqCB8q17BGUgW8QaanIkbWUo2aYGZqJaOORTLAlIvKjNDKA== dependencies: cacache "^16.0.0" json-parse-even-better-errors "^2.3.1" pacote "^13.0.3" semver "^7.3.5" -"@npmcli/move-file@^1.0.1": - version "1.1.2" - resolved "https://registry.yarnpkg.com/@npmcli/move-file/-/move-file-1.1.2.tgz#1a82c3e372f7cae9253eb66d72543d6b8685c674" - integrity sha512-1SUf/Cg2GzGDyaf15aR9St9TWlb+XvbZXWpDx8YKs7MLzMH/BCeopv+y9vzrzgkfykCGuWOlSu3mZhj2+FQcrg== - dependencies: - mkdirp "^1.0.4" - rimraf "^3.0.2" - "@npmcli/move-file@^2.0.0": version "2.0.0" resolved "https://registry.yarnpkg.com/@npmcli/move-file/-/move-file-2.0.0.tgz#417f585016081a0184cef3e38902cd917a9bbd02" @@ -1515,130 +1512,153 @@ dependencies: infer-owner "^1.0.4" -"@npmcli/run-script@^3.0.0", "@npmcli/run-script@^3.0.1", "@npmcli/run-script@^3.0.2": - version "3.0.3" - resolved "https://registry.yarnpkg.com/@npmcli/run-script/-/run-script-3.0.3.tgz#66afa6e0c4c3484056195f295fa6c1d1a45ddf58" - integrity sha512-ZXL6qgC5NjwfZJ2nET+ZSLEz/PJgJ/5CU90C2S66dZY4Jw73DasS4ZCXuy/KHWYP0imjJ4VtA+Gebb5BxxKp9Q== +"@npmcli/run-script@^4.1.0", "@npmcli/run-script@^4.1.3", "@npmcli/run-script@^4.1.7": + version "4.2.0" + resolved "https://registry.yarnpkg.com/@npmcli/run-script/-/run-script-4.2.0.tgz#2c25758f80831ba138afe25225d456e89acedac3" + integrity sha512-e/QgLg7j2wSJp1/7JRl0GC8c7PMX+uYlA/1Tb+IDOLdSM4T7K1VQ9mm9IGU3WRtY5vEIObpqCLb3aCNCug18DA== dependencies: "@npmcli/node-gyp" "^2.0.0" "@npmcli/promise-spawn" "^3.0.0" - node-gyp "^8.4.1" + node-gyp "^9.0.0" read-package-json-fast "^2.0.3" + which "^2.0.2" -"@octokit/auth-token@^2.4.4": - version "2.5.0" - resolved "https://registry.yarnpkg.com/@octokit/auth-token/-/auth-token-2.5.0.tgz#27c37ea26c205f28443402477ffd261311f21e36" - integrity sha512-r5FVUJCOLl19AxiuZD2VRZ/ORjp/4IN98Of6YJoJOkY75CIBuYfmiNHGrDwXr+aLGG55igl9QrxX3hbiXlLb+g== +"@nrwl/cli@14.5.4": + version "14.5.4" + resolved "https://registry.yarnpkg.com/@nrwl/cli/-/cli-14.5.4.tgz#86ac4fbcd1bf079b67c420376cf696b68fcc1200" + integrity sha512-UYr14hxeYV8p/zt6D6z33hljZJQROJAVxSC+mm72fyVvy88Gt0sQNLfMmOARXur0p/73PSLM0jJ2Sr7Ftsuu+A== + dependencies: + nx "14.5.4" + +"@nrwl/tao@14.5.4": + version "14.5.4" + resolved "https://registry.yarnpkg.com/@nrwl/tao/-/tao-14.5.4.tgz#a67097d424bcbf7073a1944ea1a0209c4f4f859c" + integrity sha512-a2GCuSE8WghjehuU3GVO63KZEnZXXQiqEg137yN/Na+PxwSu68XeaX53SLyzRskTV120YwBBy1YCTNzAZxxsjg== + dependencies: + nx "14.5.4" + +"@octokit/auth-token@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@octokit/auth-token/-/auth-token-3.0.0.tgz#6f22c5fc56445c496628488ba6810131558fa4a9" + integrity sha512-MDNFUBcJIptB9At7HiV7VCvU3NcL4GnfCQaP8C5lrxWrRPMJBnemYtehaKSOlaM7AYxeRyj9etenu8LVpSpVaQ== dependencies: "@octokit/types" "^6.0.3" -"@octokit/core@^3.5.1": - version "3.6.0" - resolved "https://registry.yarnpkg.com/@octokit/core/-/core-3.6.0.tgz#3376cb9f3008d9b3d110370d90e0a1fcd5fe6085" - integrity sha512-7RKRKuA4xTjMhY+eG3jthb3hlZCsOwg3rztWh75Xc+ShDWOfDDATWbeZpAHBNRpm4Tv9WgBMOy1zEJYXG6NJ7Q== +"@octokit/core@^4.0.0": + version "4.0.4" + resolved "https://registry.yarnpkg.com/@octokit/core/-/core-4.0.4.tgz#335d9b377691e3264ce57a9e5a1f6cda783e5838" + integrity sha512-sUpR/hc4Gc7K34o60bWC7WUH6Q7T6ftZ2dUmepSyJr9PRF76/qqkWjE2SOEzCqLA5W83SaISymwKtxks+96hPQ== dependencies: - "@octokit/auth-token" "^2.4.4" - "@octokit/graphql" "^4.5.8" - "@octokit/request" "^5.6.3" - "@octokit/request-error" "^2.0.5" + "@octokit/auth-token" "^3.0.0" + "@octokit/graphql" "^5.0.0" + "@octokit/request" "^6.0.0" + "@octokit/request-error" "^3.0.0" "@octokit/types" "^6.0.3" before-after-hook "^2.2.0" universal-user-agent "^6.0.0" -"@octokit/endpoint@^6.0.1": - version "6.0.12" - resolved "https://registry.yarnpkg.com/@octokit/endpoint/-/endpoint-6.0.12.tgz#3b4d47a4b0e79b1027fb8d75d4221928b2d05658" - integrity sha512-lF3puPwkQWGfkMClXb4k/eUT/nZKQfxinRWJrdZaJO85Dqwo/G0yOC434Jr2ojwafWJMYqFGFa5ms4jJUgujdA== +"@octokit/endpoint@^7.0.0": + version "7.0.0" + resolved "https://registry.yarnpkg.com/@octokit/endpoint/-/endpoint-7.0.0.tgz#be758a1236d68d6bbb505e686dd50881c327a519" + integrity sha512-Kz/mIkOTjs9rV50hf/JK9pIDl4aGwAtT8pry6Rpy+hVXkAPhXanNQRxMoq6AeRgDCZR6t/A1zKniY2V1YhrzlQ== dependencies: "@octokit/types" "^6.0.3" is-plain-object "^5.0.0" universal-user-agent "^6.0.0" -"@octokit/graphql@^4.5.8": - version "4.8.0" - resolved "https://registry.yarnpkg.com/@octokit/graphql/-/graphql-4.8.0.tgz#664d9b11c0e12112cbf78e10f49a05959aa22cc3" - integrity sha512-0gv+qLSBLKF0z8TKaSKTsS39scVKF9dbMxJpj3U0vC7wjNWFuIpL/z76Qe2fiuCbDRcJSavkXsVtMS6/dtQQsg== +"@octokit/graphql@^5.0.0": + version "5.0.0" + resolved "https://registry.yarnpkg.com/@octokit/graphql/-/graphql-5.0.0.tgz#2cc6eb3bf8e0278656df1a7d0ca0d7591599e3b3" + integrity sha512-1ZZ8tX4lUEcLPvHagfIVu5S2xpHYXAmgN0+95eAOPoaVPzCfUXJtA5vASafcpWcO86ze0Pzn30TAx72aB2aguQ== dependencies: - "@octokit/request" "^5.6.0" + "@octokit/request" "^6.0.0" "@octokit/types" "^6.0.3" universal-user-agent "^6.0.0" -"@octokit/openapi-types@^11.2.0": - version "11.2.0" - resolved "https://registry.yarnpkg.com/@octokit/openapi-types/-/openapi-types-11.2.0.tgz#b38d7fc3736d52a1e96b230c1ccd4a58a2f400a6" - integrity sha512-PBsVO+15KSlGmiI8QAzaqvsNlZlrDlyAJYcrXBCvVUxCp7VnXjkwPoFHgjEJXx3WF9BAwkA6nfCUA7i9sODzKA== +"@octokit/openapi-types@^12.11.0": + version "12.11.0" + resolved "https://registry.yarnpkg.com/@octokit/openapi-types/-/openapi-types-12.11.0.tgz#da5638d64f2b919bca89ce6602d059f1b52d3ef0" + integrity sha512-VsXyi8peyRq9PqIz/tpqiL2w3w80OgVMwBHltTml3LmVvXiphgeqmY9mvBw9Wu7e0QWk/fqD37ux8yP5uVekyQ== "@octokit/plugin-enterprise-rest@^6.0.1": version "6.0.1" resolved "https://registry.yarnpkg.com/@octokit/plugin-enterprise-rest/-/plugin-enterprise-rest-6.0.1.tgz#e07896739618dab8da7d4077c658003775f95437" integrity sha512-93uGjlhUD+iNg1iWhUENAtJata6w5nE+V4urXOAlIXdco6xNZtUSfYY8dzp3Udy74aqO/B5UZL80x/YMa5PKRw== -"@octokit/plugin-paginate-rest@^2.16.8": - version "2.17.0" - resolved "https://registry.yarnpkg.com/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-2.17.0.tgz#32e9c7cab2a374421d3d0de239102287d791bce7" - integrity sha512-tzMbrbnam2Mt4AhuyCHvpRkS0oZ5MvwwcQPYGtMv4tUa5kkzG58SVB0fcsLulOZQeRnOgdkZWkRUiyBlh0Bkyw== +"@octokit/plugin-paginate-rest@^3.0.0": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-3.1.0.tgz#86f8be759ce2d6d7c879a31490fd2f7410b731f0" + integrity sha512-+cfc40pMzWcLkoDcLb1KXqjX0jTGYXjKuQdFQDc6UAknISJHnZTiBqld6HDwRJvD4DsouDKrWXNbNV0lE/3AXA== dependencies: - "@octokit/types" "^6.34.0" + "@octokit/types" "^6.41.0" "@octokit/plugin-request-log@^1.0.4": version "1.0.4" resolved "https://registry.yarnpkg.com/@octokit/plugin-request-log/-/plugin-request-log-1.0.4.tgz#5e50ed7083a613816b1e4a28aeec5fb7f1462e85" integrity sha512-mLUsMkgP7K/cnFEw07kWqXGF5LKrOkD+lhCrKvPHXWDywAwuDUeDwWBpc69XK3pNX0uKiVt8g5z96PJ6z9xCFA== -"@octokit/plugin-rest-endpoint-methods@^5.12.0": - version "5.13.0" - resolved "https://registry.yarnpkg.com/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-5.13.0.tgz#8c46109021a3412233f6f50d28786f8e552427ba" - integrity sha512-uJjMTkN1KaOIgNtUPMtIXDOjx6dGYysdIFhgA52x4xSadQCz3b/zJexvITDVpANnfKPW/+E0xkOvLntqMYpviA== +"@octokit/plugin-rest-endpoint-methods@^6.0.0": + version "6.2.0" + resolved "https://registry.yarnpkg.com/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-6.2.0.tgz#c06359d2f94436f8c67d345093cb02dedd31d974" + integrity sha512-PZ+yfkbZAuRUtqu6Y191/V3eM0KBPx+Yq7nh+ONPdpm3EX4pd5UnK2y2XgO/0AtNum5a4aJCDjqsDuUZ2hWRXw== dependencies: - "@octokit/types" "^6.34.0" + "@octokit/types" "^6.41.0" deprecation "^2.3.1" -"@octokit/request-error@^2.0.5", "@octokit/request-error@^2.1.0": - version "2.1.0" - resolved "https://registry.yarnpkg.com/@octokit/request-error/-/request-error-2.1.0.tgz#9e150357831bfc788d13a4fd4b1913d60c74d677" - integrity sha512-1VIvgXxs9WHSjicsRwq8PlR2LR2x6DwsJAaFgzdi0JfJoGSO8mYI/cHJQ+9FbN21aa+DrgNLnwObmyeSC8Rmpg== +"@octokit/request-error@^3.0.0": + version "3.0.0" + resolved "https://registry.yarnpkg.com/@octokit/request-error/-/request-error-3.0.0.tgz#f527d178f115a3b62d76ce4804dd5bdbc0270a81" + integrity sha512-WBtpzm9lR8z4IHIMtOqr6XwfkGvMOOILNLxsWvDwtzm/n7f5AWuqJTXQXdDtOvPfTDrH4TPhEvW2qMlR4JFA2w== dependencies: "@octokit/types" "^6.0.3" deprecation "^2.0.0" once "^1.4.0" -"@octokit/request@^5.6.0", "@octokit/request@^5.6.3": - version "5.6.3" - resolved "https://registry.yarnpkg.com/@octokit/request/-/request-5.6.3.tgz#19a022515a5bba965ac06c9d1334514eb50c48b0" - integrity sha512-bFJl0I1KVc9jYTe9tdGGpAMPy32dLBXXo1dS/YwSCTL/2nd9XeHsY616RE3HPXDVk+a+dBuzyz5YdlXwcDTr2A== +"@octokit/request@^6.0.0": + version "6.2.0" + resolved "https://registry.yarnpkg.com/@octokit/request/-/request-6.2.0.tgz#9c25606df84e6f2ccbcc2c58e1d35438e20b688b" + integrity sha512-7IAmHnaezZrgUqtRShMlByJK33MT9ZDnMRgZjnRrRV9a/jzzFwKGz0vxhFU6i7VMLraYcQ1qmcAOin37Kryq+Q== dependencies: - "@octokit/endpoint" "^6.0.1" - "@octokit/request-error" "^2.1.0" + "@octokit/endpoint" "^7.0.0" + "@octokit/request-error" "^3.0.0" "@octokit/types" "^6.16.1" is-plain-object "^5.0.0" node-fetch "^2.6.7" universal-user-agent "^6.0.0" -"@octokit/rest@^18.1.0": - version "18.12.0" - resolved "https://registry.yarnpkg.com/@octokit/rest/-/rest-18.12.0.tgz#f06bc4952fc87130308d810ca9d00e79f6988881" - integrity sha512-gDPiOHlyGavxr72y0guQEhLsemgVjwRePayJ+FcKc2SJqKUbxbkvf5kAZEWA/MKvsfYlQAMVzNJE3ezQcxMJ2Q== +"@octokit/rest@^19.0.3": + version "19.0.3" + resolved "https://registry.yarnpkg.com/@octokit/rest/-/rest-19.0.3.tgz#b9a4e8dc8d53e030d611c053153ee6045f080f02" + integrity sha512-5arkTsnnRT7/sbI4fqgSJ35KiFaN7zQm0uQiQtivNQLI8RQx8EHwJCajcTUwmaCMNDg7tdCvqAnc7uvHHPxrtQ== dependencies: - "@octokit/core" "^3.5.1" - "@octokit/plugin-paginate-rest" "^2.16.8" + "@octokit/core" "^4.0.0" + "@octokit/plugin-paginate-rest" "^3.0.0" "@octokit/plugin-request-log" "^1.0.4" - "@octokit/plugin-rest-endpoint-methods" "^5.12.0" + "@octokit/plugin-rest-endpoint-methods" "^6.0.0" -"@octokit/types@^6.0.3", "@octokit/types@^6.16.1", "@octokit/types@^6.34.0": - version "6.34.0" - resolved "https://registry.yarnpkg.com/@octokit/types/-/types-6.34.0.tgz#c6021333334d1ecfb5d370a8798162ddf1ae8218" - integrity sha512-s1zLBjWhdEI2zwaoSgyOFoKSl109CUcVBCc7biPJ3aAf6LGLU6szDvi31JPU7bxfla2lqfhjbbg/5DdFNxOwHw== +"@octokit/types@^6.0.3", "@octokit/types@^6.16.1", "@octokit/types@^6.41.0": + version "6.41.0" + resolved "https://registry.yarnpkg.com/@octokit/types/-/types-6.41.0.tgz#e58ef78d78596d2fb7df9c6259802464b5f84a04" + integrity sha512-eJ2jbzjdijiL3B4PrSQaSjuF2sPEQPVCPzBvTHJD9Nz+9dw2SGH4K4xeQJ77YfTq5bRQ+bD8wT11JbeDPmxmGg== dependencies: - "@octokit/openapi-types" "^11.2.0" + "@octokit/openapi-types" "^12.11.0" -"@openpgp/web-stream-tools@0.0.10": - version "0.0.10" - resolved "https://registry.yarnpkg.com/@openpgp/web-stream-tools/-/web-stream-tools-0.0.10.tgz#4496390da9715c9bfc581ad144f9fb8a36a37775" - integrity sha512-1ONZADML0fb0RJR5UiGYPnRf9VaYBYUBc1gF9jyq57sHkr58cp5/BQHS+ivrqbRw21Sb70FKTssmJbRe71V+kw== +"@openpgp/web-stream-tools@0.0.11": + version "0.0.11" + resolved "https://registry.yarnpkg.com/@openpgp/web-stream-tools/-/web-stream-tools-0.0.11.tgz#71bc9c493024506b1e5b0a587b85984435bbff8e" + integrity sha512-52NMPRmlXIVajd5dhpDNsG7WJRCdlcS1wXY03OGH1rxm7p6i3QzJvTVyKEAcW0T9KojvLKakV2uTICceELqSMw== dependencies: "@mattiasbuelens/web-streams-adapter" "~0.1.0" web-streams-polyfill "~3.0.3" +"@parcel/watcher@2.0.4": + version "2.0.4" + resolved "https://registry.yarnpkg.com/@parcel/watcher/-/watcher-2.0.4.tgz#f300fef4cc38008ff4b8c29d92588eced3ce014b" + integrity sha512-cTDi+FUDBIUOBKEtj+nhiJ71AZVlkAsQFuGQTun5tV9mwQBQgZvhCzG+URPQc8myeN32yRVZEfVAPCs1RW+Jvg== + dependencies: + node-addon-api "^3.2.1" + node-gyp-build "^4.3.0" + "@polka/url@^1.0.0-next.20": version "1.0.0-next.21" resolved "https://registry.yarnpkg.com/@polka/url/-/url-1.0.0-next.21.tgz#5de5a2385a35309427f6011992b544514d559aa1" @@ -1677,10 +1697,10 @@ resolved "https://registry.yarnpkg.com/@rollup/stream/-/stream-2.0.0.tgz#2ada818c2d042e37f63119d7bf8bbfc71792f641" integrity sha512-HsCyY/phZMys1zFUYoYlnDJGG9zMmYFfEjDKNQa00CYgjeyGD4cLdO6KNIkBh61AWOZfOsTPuGtNmFCsjQOfFg== -"@sinclair/typebox@^0.23.3": - version "0.23.5" - resolved "https://registry.yarnpkg.com/@sinclair/typebox/-/typebox-0.23.5.tgz#93f7b9f4e3285a7a9ade7557d9a8d36809cbc47d" - integrity sha512-AFBVi/iT4g20DHoujvMH1aEDn8fGJh4xsRGCP6d8RpLPMqsNPvW01Jcn0QysXTsg++/xj25NmJsGyH9xug/wKg== +"@sinclair/typebox@^0.24.1": + version "0.24.27" + resolved "https://registry.yarnpkg.com/@sinclair/typebox/-/typebox-0.24.27.tgz#d55643516a1546174e10da681a8aaa81e757452d" + integrity sha512-K7C7IlQ3zLePEZleUN21ceBA2aLcMnLHTLph8QWk1JK37L90obdpY+QGY8bXMKxf1ht1Z0MNewvXxWv0oGDYFg== "@sinonjs/commons@^1.7.0": version "1.8.3" @@ -1689,42 +1709,37 @@ dependencies: type-detect "4.0.8" -"@sinonjs/fake-timers@^9.1.1": +"@sinonjs/fake-timers@^9.1.2": version "9.1.2" resolved "https://registry.yarnpkg.com/@sinonjs/fake-timers/-/fake-timers-9.1.2.tgz#4eaab737fab77332ab132d396a3c0d364bd0ea8c" integrity sha512-BPS4ynJW/o92PUR4wgriz2Ud5gpST5vz6GQfMixEDK0Z8ZCUv2M7SkBLykH56T++Xs+8ln9zTGbOvNGIe02/jw== dependencies: "@sinonjs/commons" "^1.7.0" -"@tootallnate/once@1": - version "1.1.2" - resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.1.2.tgz#ccb91445360179a04e7fe6aff78c00ffc1eeaf82" - integrity sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw== - "@tootallnate/once@2": version "2.0.0" resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-2.0.0.tgz#f544a148d3ab35801c1f633a7441fd87c2e484bf" integrity sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A== "@tsconfig/node10@^1.0.7": - version "1.0.8" - resolved "https://registry.yarnpkg.com/@tsconfig/node10/-/node10-1.0.8.tgz#c1e4e80d6f964fbecb3359c43bd48b40f7cadad9" - integrity sha512-6XFfSQmMgq0CFLY1MslA/CPUfhIL919M1rMsa5lP2P097N2Wd1sSX0tx1u4olM16fLNhtHZpRhedZJphNJqmZg== + version "1.0.9" + resolved "https://registry.yarnpkg.com/@tsconfig/node10/-/node10-1.0.9.tgz#df4907fc07a886922637b15e02d4cebc4c0021b2" + integrity sha512-jNsYVVxU8v5g43Erja32laIDHXeoNvFEpX33OK4d6hljo3jDhCBDhx5dhCCTMWUojscpAagGiRkBKxpdl9fxqA== "@tsconfig/node12@^1.0.7": - version "1.0.9" - resolved "https://registry.yarnpkg.com/@tsconfig/node12/-/node12-1.0.9.tgz#62c1f6dee2ebd9aead80dc3afa56810e58e1a04c" - integrity sha512-/yBMcem+fbvhSREH+s14YJi18sp7J9jpuhYByADT2rypfajMZZN4WQ6zBGgBKp53NKmqI36wFYDb3yaMPurITw== + version "1.0.11" + resolved "https://registry.yarnpkg.com/@tsconfig/node12/-/node12-1.0.11.tgz#ee3def1f27d9ed66dac6e46a295cffb0152e058d" + integrity sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag== "@tsconfig/node14@^1.0.0": - version "1.0.1" - resolved "https://registry.yarnpkg.com/@tsconfig/node14/-/node14-1.0.1.tgz#95f2d167ffb9b8d2068b0b235302fafd4df711f2" - integrity sha512-509r2+yARFfHHE7T6Puu2jjkoycftovhXRqW328PDXTVGKihlb1P8Z9mMZH04ebyajfRY7dedfGynlrFHJUQCg== + version "1.0.3" + resolved "https://registry.yarnpkg.com/@tsconfig/node14/-/node14-1.0.3.tgz#e4386316284f00b98435bf40f72f75a09dabf6c1" + integrity sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow== "@tsconfig/node16@^1.0.2": - version "1.0.2" - resolved "https://registry.yarnpkg.com/@tsconfig/node16/-/node16-1.0.2.tgz#423c77877d0569db20e1fc80885ac4118314010e" - integrity sha512-eZxlbI8GZscaGS7kkc/trHTT5xgrjH3/1n2JDwusC9iahPKWMRvRjJSAN5mCXviuTGQ/lHnhvv8Q1YTpnfz9gA== + version "1.0.3" + resolved "https://registry.yarnpkg.com/@tsconfig/node16/-/node16-1.0.3.tgz#472eaab5f15c1ffdd7f8628bd4c4f753995ec79e" + integrity sha512-yOlFc+7UtL/89t2ZhjPvvB/DeAr3r+Dq58IgzsFkOAvVC6NMJXmCGjbptdXdR9qsX7pKcTL+s87FtYREi2dEEQ== "@types/babel__core@^7.1.14": version "7.1.19" @@ -1753,9 +1768,9 @@ "@babel/types" "^7.0.0" "@types/babel__traverse@*", "@types/babel__traverse@^7.0.6": - version "7.17.1" - resolved "https://registry.yarnpkg.com/@types/babel__traverse/-/babel__traverse-7.17.1.tgz#1a0e73e8c28c7e832656db372b779bfd2ef37314" - integrity sha512-kVzjari1s2YVi77D3w1yuvohV2idweYXMCDzqBiVNN63TcDWrIlTVOYpqVrvbbyOE/IyzBoTKF0fdnLPEORFxA== + version "7.18.0" + resolved "https://registry.yarnpkg.com/@types/babel__traverse/-/babel__traverse-7.18.0.tgz#8134fd78cb39567465be65b9fdc16d378095f41f" + integrity sha512-v4Vwdko+pgymgS+A2UIaJru93zQd85vIGWObM5ekZNdXCKtDYqATlEYnWgfo86Q6I1Lh0oXnksDnMU1cwmlPDw== dependencies: "@babel/types" "^7.3.0" @@ -1775,31 +1790,36 @@ integrity sha512-n7RlEEJ+4x4TS7ZQddTmNSxP+zziEG0TNsMfiRIxcIVXt71ENJ9ojeXmGO3wPoTdn7pJcU2xc3CJYMktNT6DPg== "@types/eslint-scope@^3.7.3": - version "3.7.3" - resolved "https://registry.yarnpkg.com/@types/eslint-scope/-/eslint-scope-3.7.3.tgz#125b88504b61e3c8bc6f870882003253005c3224" - integrity sha512-PB3ldyrcnAicT35TWPs5IcwKD8S333HMaa2VVv4+wdvebJkjWuW/xESoB8IwRcog8HYVYamb1g/R31Qv5Bx03g== + version "3.7.4" + resolved "https://registry.yarnpkg.com/@types/eslint-scope/-/eslint-scope-3.7.4.tgz#37fc1223f0786c39627068a12e94d6e6fc61de16" + integrity sha512-9K4zoImiZc3HlIp6AVUDE4CWYx22a+lhSZMYNpbjW04+YF0KWj4pJXnEMjdnFTiQibFFmElcsasJXDbdI/EPhA== dependencies: "@types/eslint" "*" "@types/estree" "*" "@types/eslint@*": - version "8.4.2" - resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-8.4.2.tgz#48f2ac58ab9c631cb68845c3d956b28f79fad575" - integrity sha512-Z1nseZON+GEnFjJc04sv4NSALGjhFwy6K0HXt7qsn5ArfAKtb63dXNJHf+1YW6IpOIYRBGUbu3GwJdj8DGnCjA== + version "8.4.5" + resolved "https://registry.yarnpkg.com/@types/eslint/-/eslint-8.4.5.tgz#acdfb7dd36b91cc5d812d7c093811a8f3d9b31e4" + integrity sha512-dhsC09y1gpJWnK+Ff4SGvCuSnk9DaU0BJZSzOwa6GVSg65XtTugLBITDAAzRU5duGBoXBHpdR/9jHGxJjNflJQ== dependencies: "@types/estree" "*" "@types/json-schema" "*" -"@types/estree@*", "@types/estree@^0.0.51": - version "0.0.51" - resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.51.tgz#cfd70924a25a3fd32b218e5e420e6897e1ac4f40" - integrity sha512-CuPgU6f3eT/XgKKPqKd/gLZV1Xmvf1a2R5POBOGQa6uv82xpls89HU5zKeVoyR8XzHd1RGNOlQlvUe3CFkjWNQ== +"@types/estree@*": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.0.tgz#5fb2e536c1ae9bf35366eed879e827fa59ca41c2" + integrity sha512-WulqXMDUTYAXCjZnk6JtIHPigp55cVtDgDrO2gHRwhyJto21+1zbVCtOYB2L1F9w4qCQ0rOGWBnBe0FNTiEJIQ== "@types/estree@0.0.39": version "0.0.39" resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.39.tgz#e177e699ee1b8c22d23174caaa7422644389509f" integrity sha512-EYNwp3bU+98cpU4lAWYYL7Zz+2gryWH1qbdDTidVd6hkiR6weksdbMadyXKXNPEkQFhXM+hVO9ZygomHXp+AIw== +"@types/estree@^0.0.51": + version "0.0.51" + resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.51.tgz#cfd70924a25a3fd32b218e5e420e6897e1ac4f40" + integrity sha512-CuPgU6f3eT/XgKKPqKd/gLZV1Xmvf1a2R5POBOGQa6uv82xpls89HU5zKeVoyR8XzHd1RGNOlQlvUe3CFkjWNQ== + "@types/expect@^1.20.4": version "1.20.4" resolved "https://registry.yarnpkg.com/@types/expect/-/expect-1.20.4.tgz#8288e51737bf7e3ab5d7c77bfa695883745264e5" @@ -1839,19 +1859,24 @@ dependencies: "@types/istanbul-lib-report" "*" -"@types/jest@27.5.1": - version "27.5.1" - resolved "https://registry.yarnpkg.com/@types/jest/-/jest-27.5.1.tgz#2c8b6dc6ff85c33bcd07d0b62cb3d19ddfdb3ab9" - integrity sha512-fUy7YRpT+rHXto1YlL+J9rs0uLGyiqVt3ZOTQR+4ROc47yNl8WLdVLgUloBRhOxP1PZvguHl44T3H0wAWxahYQ== +"@types/jest@28.1.6": + version "28.1.6" + resolved "https://registry.yarnpkg.com/@types/jest/-/jest-28.1.6.tgz#d6a9cdd38967d2d746861fb5be6b120e38284dd4" + integrity sha512-0RbGAFMfcBJKOmqRazM8L98uokwuwD5F8rHrv/ZMbrZBwVOWZUyPG6VFNscjYr/vjM3Vu4fRrCPbOs42AfemaQ== dependencies: - jest-matcher-utils "^27.0.0" - pretty-format "^27.0.0" + jest-matcher-utils "^28.0.0" + pretty-format "^28.0.0" "@types/json-schema@*", "@types/json-schema@^7.0.8", "@types/json-schema@^7.0.9": version "7.0.11" resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.11.tgz#d421b6c527a3037f7c84433fd2c4229e016863d3" integrity sha512-wOuvG1SN4Us4rez+tylwwwCV1psiNVOkJeM3AUWUNWg/jDQY2+HE/444y5gc+jBmRqASOm2Oeh5c1axHobwRKQ== +"@types/json5@^0.0.29": + version "0.0.29" + resolved "https://registry.yarnpkg.com/@types/json5/-/json5-0.0.29.tgz#ee28707ae94e11d2b827bcbe5270bcea7f3e71ee" + integrity sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ== + "@types/minimatch@*", "@types/minimatch@^3.0.3": version "3.0.5" resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.5.tgz#1001cc5e6a3704b83c236027e77f2f58ea010f40" @@ -1862,10 +1887,10 @@ resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.2.tgz#ee771e2ba4b3dc5b372935d549fd9617bf345b8c" integrity sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ== -"@types/node@*", "@types/node@^17.0.36": - version "17.0.36" - resolved "https://registry.yarnpkg.com/@types/node/-/node-17.0.36.tgz#c0d5f2fe76b47b63e0e0efc3d2049a9970d68794" - integrity sha512-V3orv+ggDsWVHP99K3JlwtH20R7J4IhI1Kksgc+64q5VxgfRkQG8Ws3MFm/FZOKDYGy9feGFlZ70/HpCNe9QaA== +"@types/node@*", "@types/node@^18.6.4": + version "18.6.4" + resolved "https://registry.yarnpkg.com/@types/node/-/node-18.6.4.tgz#fd26723a8a3f8f46729812a7f9b4fc2d1608ed39" + integrity sha512-I4BD3L+6AWiUobfxZ49DlU43gtI+FTHSv9pE2Zekg6KjMpre4ByusaljW3vYSLJrvQ1ck1hUaeVu8HVlY3vzHg== "@types/node@^13.7.4": version "13.13.52" @@ -1873,9 +1898,9 @@ integrity sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ== "@types/node@^14.14.41": - version "14.18.18" - resolved "https://registry.yarnpkg.com/@types/node/-/node-14.18.18.tgz#5c9503030df484ccffcbb935ea9a9e1d6fad1a20" - integrity sha512-B9EoJFjhqcQ9OmQrNorItO+OwEOORNn3S31WuiHvZY/dm9ajkB7AKD/8toessEtHHNL+58jofbq7hMMY9v4yig== + version "14.18.23" + resolved "https://registry.yarnpkg.com/@types/node/-/node-14.18.23.tgz#70f5f20b0b1b38f696848c1d3647bb95694e615e" + integrity sha512-MhbCWN18R4GhO8ewQWAFK4TGQdBpXWByukz7cWyJmXhvRuCIaM/oWytGPqVmDzgEnnaIc9ss6HbU5mUi+vyZPA== "@types/normalize-package-data@^2.4.0": version "2.4.1" @@ -1893,9 +1918,9 @@ integrity sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA== "@types/prettier@^2.1.5": - version "2.6.3" - resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.6.3.tgz#68ada76827b0010d0db071f739314fa429943d0a" - integrity sha512-ymZk3LEC/fsut+/Q5qejp6R9O1rMxz3XaRHDV6kX8MrGAhOSPqVARbDi+EZvInBpw+BnCX3TD240byVkOfQsHg== + version "2.7.0" + resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.7.0.tgz#ea03e9f0376a4446f44797ca19d9c46c36e352dc" + integrity sha512-RI1L7N4JnW5gQw2spvL7Sllfuf1SaHdrZpCHiBlCXjIlufi1SMNnbu2teze3/QE67Fg2tBlH7W+mi4hVNk4p0A== "@types/randomatic@3.1.3": version "3.1.3" @@ -1935,20 +1960,20 @@ "@types/yargs-parser" "*" "@types/yargs@^17.0.8": - version "17.0.10" - resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-17.0.10.tgz#591522fce85d8739bca7b8bb90d048e4478d186a" - integrity sha512-gmEaFwpj/7f/ROdtIlci1R1VYU1J4j95m8T+Tj3iBgiBFKg1foE/PSl93bBd5T9LDXNPo8UlNN6W0qwD8O5OaA== + version "17.0.11" + resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-17.0.11.tgz#5e10ca33e219807c0eee0f08b5efcba9b6a42c06" + integrity sha512-aB4y9UDUXTSMxmM4MH+YnuR0g5Cph3FLQBoWoMB21DSvFVAxRVEHEMx3TLh+zUZYMCQtKiqazz0Q4Rre31f/OA== dependencies: "@types/yargs-parser" "*" -"@typescript-eslint/eslint-plugin@5.27.0": - version "5.27.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.27.0.tgz#23d82a4f21aaafd8f69dbab7e716323bb6695cc8" - integrity sha512-DDrIA7GXtmHXr1VCcx9HivA39eprYBIFxbQEHI6NyraRDxCGpxAFiYQAT/1Y0vh1C+o2vfBiy4IuPoXxtTZCAQ== +"@typescript-eslint/eslint-plugin@5.32.0": + version "5.32.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.32.0.tgz#e27e38cffa4a61226327c874a7be965e9a861624" + integrity sha512-CHLuz5Uz7bHP2WgVlvoZGhf0BvFakBJKAD/43Ty0emn4wXWv5k01ND0C0fHcl/Im8Td2y/7h44E9pca9qAu2ew== dependencies: - "@typescript-eslint/scope-manager" "5.27.0" - "@typescript-eslint/type-utils" "5.27.0" - "@typescript-eslint/utils" "5.27.0" + "@typescript-eslint/scope-manager" "5.32.0" + "@typescript-eslint/type-utils" "5.32.0" + "@typescript-eslint/utils" "5.32.0" debug "^4.3.4" functional-red-black-tree "^1.0.1" ignore "^5.2.0" @@ -1956,69 +1981,69 @@ semver "^7.3.7" tsutils "^3.21.0" -"@typescript-eslint/parser@5.27.0": - version "5.27.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-5.27.0.tgz#62bb091ed5cf9c7e126e80021bb563dcf36b6b12" - integrity sha512-8oGjQF46c52l7fMiPPvX4It3u3V3JipssqDfHQ2hcR0AeR8Zge+OYyKUCm5b70X72N1qXt0qgHenwN6Gc2SXZA== +"@typescript-eslint/parser@5.32.0": + version "5.32.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-5.32.0.tgz#1de243443bc6186fb153b9e395b842e46877ca5d" + integrity sha512-IxRtsehdGV9GFQ35IGm5oKKR2OGcazUoiNBxhRV160iF9FoyuXxjY+rIqs1gfnd+4eL98OjeGnMpE7RF/NBb3A== dependencies: - "@typescript-eslint/scope-manager" "5.27.0" - "@typescript-eslint/types" "5.27.0" - "@typescript-eslint/typescript-estree" "5.27.0" + "@typescript-eslint/scope-manager" "5.32.0" + "@typescript-eslint/types" "5.32.0" + "@typescript-eslint/typescript-estree" "5.32.0" debug "^4.3.4" -"@typescript-eslint/scope-manager@5.27.0": - version "5.27.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-5.27.0.tgz#a272178f613050ed62f51f69aae1e19e870a8bbb" - integrity sha512-VnykheBQ/sHd1Vt0LJ1JLrMH1GzHO+SzX6VTXuStISIsvRiurue/eRkTqSrG0CexHQgKG8shyJfR4o5VYioB9g== +"@typescript-eslint/scope-manager@5.32.0": + version "5.32.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-5.32.0.tgz#763386e963a8def470580cc36cf9228864190b95" + integrity sha512-KyAE+tUON0D7tNz92p1uetRqVJiiAkeluvwvZOqBmW9z2XApmk5WSMV9FrzOroAcVxJZB3GfUwVKr98Dr/OjOg== dependencies: - "@typescript-eslint/types" "5.27.0" - "@typescript-eslint/visitor-keys" "5.27.0" + "@typescript-eslint/types" "5.32.0" + "@typescript-eslint/visitor-keys" "5.32.0" -"@typescript-eslint/type-utils@5.27.0": - version "5.27.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-5.27.0.tgz#36fd95f6747412251d79c795b586ba766cf0974b" - integrity sha512-vpTvRRchaf628Hb/Xzfek+85o//zEUotr1SmexKvTfs7czXfYjXVT/a5yDbpzLBX1rhbqxjDdr1Gyo0x1Fc64g== +"@typescript-eslint/type-utils@5.32.0": + version "5.32.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-5.32.0.tgz#45a14506fe3fb908600b4cef2f70778f7b5cdc79" + integrity sha512-0gSsIhFDduBz3QcHJIp3qRCvVYbqzHg8D6bHFsDMrm0rURYDj+skBK2zmYebdCp+4nrd9VWd13egvhYFJj/wZg== dependencies: - "@typescript-eslint/utils" "5.27.0" + "@typescript-eslint/utils" "5.32.0" debug "^4.3.4" tsutils "^3.21.0" -"@typescript-eslint/types@5.27.0": - version "5.27.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-5.27.0.tgz#c3f44b9dda6177a9554f94a74745ca495ba9c001" - integrity sha512-lY6C7oGm9a/GWhmUDOs3xAVRz4ty/XKlQ2fOLr8GAIryGn0+UBOoJDWyHer3UgrHkenorwvBnphhP+zPmzmw0A== +"@typescript-eslint/types@5.32.0": + version "5.32.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-5.32.0.tgz#484273021eeeae87ddb288f39586ef5efeb6dcd8" + integrity sha512-EBUKs68DOcT/EjGfzywp+f8wG9Zw6gj6BjWu7KV/IYllqKJFPlZlLSYw/PTvVyiRw50t6wVbgv4p9uE2h6sZrQ== -"@typescript-eslint/typescript-estree@5.27.0": - version "5.27.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-5.27.0.tgz#7965f5b553c634c5354a47dcce0b40b94611e995" - integrity sha512-QywPMFvgZ+MHSLRofLI7BDL+UczFFHyj0vF5ibeChDAJgdTV8k4xgEwF0geFhVlPc1p8r70eYewzpo6ps+9LJQ== +"@typescript-eslint/typescript-estree@5.32.0": + version "5.32.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-5.32.0.tgz#282943f34babf07a4afa7b0ff347a8e7b6030d12" + integrity sha512-ZVAUkvPk3ITGtCLU5J4atCw9RTxK+SRc6hXqLtllC2sGSeMFWN+YwbiJR9CFrSFJ3w4SJfcWtDwNb/DmUIHdhg== dependencies: - "@typescript-eslint/types" "5.27.0" - "@typescript-eslint/visitor-keys" "5.27.0" + "@typescript-eslint/types" "5.32.0" + "@typescript-eslint/visitor-keys" "5.32.0" debug "^4.3.4" globby "^11.1.0" is-glob "^4.0.3" semver "^7.3.7" tsutils "^3.21.0" -"@typescript-eslint/utils@5.27.0", "@typescript-eslint/utils@^5.10.0": - version "5.27.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-5.27.0.tgz#d0021cbf686467a6a9499bd0589e19665f9f7e71" - integrity sha512-nZvCrkIJppym7cIbP3pOwIkAefXOmfGPnCM0LQfzNaKxJHI6VjI8NC662uoiPlaf5f6ymkTy9C3NQXev2mdXmA== +"@typescript-eslint/utils@5.32.0", "@typescript-eslint/utils@^5.10.0": + version "5.32.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-5.32.0.tgz#eccb6b672b94516f1afc6508d05173c45924840c" + integrity sha512-W7lYIAI5Zlc5K082dGR27Fczjb3Q57ECcXefKU/f0ajM5ToM0P+N9NmJWip8GmGu/g6QISNT+K6KYB+iSHjXCQ== dependencies: "@types/json-schema" "^7.0.9" - "@typescript-eslint/scope-manager" "5.27.0" - "@typescript-eslint/types" "5.27.0" - "@typescript-eslint/typescript-estree" "5.27.0" + "@typescript-eslint/scope-manager" "5.32.0" + "@typescript-eslint/types" "5.32.0" + "@typescript-eslint/typescript-estree" "5.32.0" eslint-scope "^5.1.1" eslint-utils "^3.0.0" -"@typescript-eslint/visitor-keys@5.27.0": - version "5.27.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-5.27.0.tgz#97aa9a5d2f3df8215e6d3b77f9d214a24db269bd" - integrity sha512-46cYrteA2MrIAjv9ai44OQDUoCZyHeGIc4lsjCUX2WT6r4C+kidz1bNiR4017wHOPUythYeH+Sc7/cFP97KEAA== +"@typescript-eslint/visitor-keys@5.32.0": + version "5.32.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-5.32.0.tgz#b9715d0b11fdb5dd10fd0c42ff13987470525394" + integrity sha512-S54xOHZgfThiZ38/ZGTgB2rqx51CMJ5MCfVT2IplK4Q7hgzGfe0nLzLCcenDnc/cSjP568hdeKfeDcBgqNHD/g== dependencies: - "@typescript-eslint/types" "5.27.0" + "@typescript-eslint/types" "5.32.0" eslint-visitor-keys "^3.3.0" "@webassemblyjs/ast@1.11.1": @@ -2185,10 +2210,10 @@ acorn@^6.4.1: resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.4.2.tgz#35866fd710528e92de10cf06016498e47e39e1e6" integrity sha512-XtGIhXwF8YM8bJhGxG5kXgjkEuNGLTkoYqVE+KMR+aspr4KGYmKYg7yUe3KghyQ9yheNwLnjmzh/7+gfDBmHCQ== -acorn@^8.0.4, acorn@^8.4.1, acorn@^8.5.0, acorn@^8.7.1: - version "8.7.1" - resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.7.1.tgz#0197122c843d1bf6d0a5e83220a788f278f63c30" - integrity sha512-Xx54uLJQZ19lKygFXOWsscKUbsBZW0CPykPhVQdhIeIwrbPmJzqeASDInc8nKBnp/JT6igTs82qPXz069H8I/A== +acorn@^8.0.4, acorn@^8.4.1, acorn@^8.5.0, acorn@^8.7.1, acorn@^8.8.0: + version "8.8.0" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.8.0.tgz#88c0187620435c7f6015803f5539dae05a9dbea8" + integrity sha512-QOxyigPVrpZ2GXT+PFyZTl6TtOFc5egxHIP9IlQ+RbupQuX4RkT/Bee4/kQuC02Xkzg84JcT7oLYtDIQxp+v7w== add-stream@^1.0.0: version "1.0.0" @@ -2202,7 +2227,7 @@ agent-base@6, agent-base@^6.0.2: dependencies: debug "4" -agentkeepalive@^4.1.3, agentkeepalive@^4.2.1: +agentkeepalive@^4.2.1: version "4.2.1" resolved "https://registry.yarnpkg.com/agentkeepalive/-/agentkeepalive-4.2.1.tgz#a7975cbb9f83b367f06c90cc51ff28fe7d499717" integrity sha512-Zn4cw2NEqd+9fiSVWMscnjyQ1a8Yfoc5oBajLeo5w+YBHgDUcEBY2hS4YpTz6iN5f/2zQiktcuM6tS8x1p9dpA== @@ -2219,6 +2244,14 @@ aggregate-error@^3.0.0: clean-stack "^2.0.0" indent-string "^4.0.0" +aggregate-error@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-4.0.1.tgz#25091fe1573b9e0be892aeda15c7c66a545f758e" + integrity sha512-0poP0T7el6Vq3rstR8Mn4V/IQrpBLO6POkUSrN7RhyY+GF/InCFShQzsQ39T25gkHhLgSLByyAz+Kjb+c2L98w== + dependencies: + clean-stack "^4.0.0" + indent-string "^5.0.0" + ajv-keywords@^3.5.2: version "3.5.2" resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.5.2.tgz#31f29da5ab6e00d1c2d329acf7b5929614d5014d" @@ -2246,6 +2279,11 @@ ansi-colors@^3.0.5: resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-3.2.4.tgz#e3a3da4bfbae6c86a9c285625de124a234026fbf" integrity sha512-hHUXGagefjN2iRrID63xckIvotOXOojhQKWIPUZ4mNUZ9nLZW+7FMNoE1lOkEhNWYsx/7ysGIuJYCiMAA9FnrA== +ansi-colors@^4.1.1: + version "4.1.3" + resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.3.tgz#37611340eb2243e70cc604cad35d63270d48781b" + integrity sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw== + ansi-escapes@^4.2.1, ansi-escapes@^4.3.0: version "4.3.2" resolved "https://registry.yarnpkg.com/ansi-escapes/-/ansi-escapes-4.3.2.tgz#6b2291d1db7d98b6521d5f1efa42d0f3a9feb65e" @@ -2302,7 +2340,7 @@ anymatch@^2.0.0: micromatch "^3.1.4" normalize-path "^2.1.1" -anymatch@^3.0.3: +anymatch@^3.0.3, anymatch@~3.1.2: version "3.1.2" resolved "https://registry.yarnpkg.com/anymatch/-/anymatch-3.1.2.tgz#c0557c096af32f106198f4f4e2a383537e378716" integrity sha512-P43ePfOAIupkguHUycrc4qJ9kz8ZiuOUijaETwX7THt0Y/GNK7v0aa8rY816xWjZ7rJdA5XdMcpVFTKMq+RvWg== @@ -2317,11 +2355,6 @@ append-buffer@^1.0.2: dependencies: buffer-equal "^1.0.0" -aproba@^1.0.3: - version "1.2.0" - resolved "https://registry.yarnpkg.com/aproba/-/aproba-1.2.0.tgz#6802e6264efd18c790a1b0d517f0f2627bf2c94a" - integrity sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw== - "aproba@^1.0.3 || ^2.0.0", aproba@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/aproba/-/aproba-2.0.0.tgz#52520b8ae5b569215b354efc0caa3fe1e45a8adc" @@ -2333,21 +2366,13 @@ archy@^1.0.0: integrity sha512-Xg+9RwCg/0p32teKdGMPTPnVXKD0w3DfHnFTficozsAgsvq2XenPJq/MYpzzQ/v8zrOyJn6Ds39VA4JIDwFfqw== are-we-there-yet@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/are-we-there-yet/-/are-we-there-yet-3.0.0.tgz#ba20bd6b553e31d62fc8c31bd23d22b95734390d" - integrity sha512-0GWpv50YSOcLXaN6/FAKY3vfRbllXWV2xvfA/oKJF8pzFhWXPV+yjhJXDBbjscDYowv7Yw1A3uigpzn5iEGTyw== + version "3.0.1" + resolved "https://registry.yarnpkg.com/are-we-there-yet/-/are-we-there-yet-3.0.1.tgz#679df222b278c64f2cdba1175cdc00b0d96164bd" + integrity sha512-QZW4EDmGwlYur0Yyf/b2uGucHQMa8aFUP7eu9ddR73vvhFyt4V0Vl3QHPcTNJ8l6qYOBdxgXdnBXQrHilfRQBg== dependencies: delegates "^1.0.0" readable-stream "^3.6.0" -are-we-there-yet@~1.1.2: - version "1.1.7" - resolved "https://registry.yarnpkg.com/are-we-there-yet/-/are-we-there-yet-1.1.7.tgz#b15474a932adab4ff8a50d9adfa7e4e926f21146" - integrity sha512-nxwy40TuMiUGqMyRHgCSWZ9FM4VAoRP4xUYSTv5ImRog+h9yISPbVH7H8fASCIzYn9wlEv4zvFL7uKDMCFQm3g== - dependencies: - delegates "^1.0.0" - readable-stream "^2.0.6" - arg@^4.1.0: version "4.1.3" resolved "https://registry.yarnpkg.com/arg/-/arg-4.1.3.tgz#269fc7ad5b8e42cb63c896d5666017261c144089" @@ -2483,7 +2508,16 @@ astral-regex@^2.0.0: resolved "https://registry.yarnpkg.com/astral-regex/-/astral-regex-2.0.0.tgz#483143c567aeed4785759c0865786dc77d7d2e31" integrity sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ== -async-done@1.3.2, async-done@^1.2.0, async-done@^1.2.2: +async-done@2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/async-done/-/async-done-2.0.0.tgz#f1ec5df738c6383a52b0a30d0902fd897329c15a" + integrity sha512-j0s3bzYq9yKIVLKGE/tWlCpa3PfFLcrDZLTSVdnnCTGagXuXBJO4SsY9Xdk/fQBirCkH4evW5xOeJXqlAQFdsw== + dependencies: + end-of-stream "^1.4.4" + once "^1.4.0" + stream-exhaust "^1.0.2" + +async-done@^1.2.0, async-done@^1.2.2: version "1.3.2" resolved "https://registry.yarnpkg.com/async-done/-/async-done-1.3.2.tgz#5e15aa729962a4b07414f528a88cdf18e0b290a2" integrity sha512-uYkTP8dw2og1tu1nmza1n1CMW0qb8gWWlwqMmLb7MhBVs4BXrFziT6HXUd+/RlRA/i4H9AkofYloUbs1fwMqlw== @@ -2515,15 +2549,15 @@ atob@^2.1.2: resolved "https://registry.yarnpkg.com/atob/-/atob-2.1.2.tgz#6d9517eb9e030d2436666651e86bd9f6f13533c9" integrity sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg== -babel-jest@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-28.1.0.tgz#95a67f8e2e7c0042e7b3ad3951b8af41a533b5ea" - integrity sha512-zNKk0yhDZ6QUwfxh9k07GII6siNGMJWVUU49gmFj5gfdqDKLqa2RArXOF2CODp4Dr7dLxN2cvAV+667dGJ4b4w== +babel-jest@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-28.1.3.tgz#c1187258197c099072156a0a121c11ee1e3917d5" + integrity sha512-epUaPOEWMk3cWX0M/sPvCHHCe9fMFAa/9hXEgKP8nFfNl/jlGkE9ucq9NqkZGXLDduCJYS0UvSlPUwC0S+rH6Q== dependencies: - "@jest/transform" "^28.1.0" + "@jest/transform" "^28.1.3" "@types/babel__core" "^7.1.14" babel-plugin-istanbul "^6.1.1" - babel-preset-jest "^28.0.2" + babel-preset-jest "^28.1.3" chalk "^4.0.0" graceful-fs "^4.2.9" slash "^3.0.0" @@ -2539,10 +2573,10 @@ babel-plugin-istanbul@^6.1.1: istanbul-lib-instrument "^5.0.4" test-exclude "^6.0.0" -babel-plugin-jest-hoist@^28.0.2: - version "28.0.2" - resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-28.0.2.tgz#9307d03a633be6fc4b1a6bc5c3a87e22bd01dd3b" - integrity sha512-Kizhn/ZL+68ZQHxSnHyuvJv8IchXD62KQxV77TBDV/xoBFBOfgRAk97GNs6hXdTTCiVES9nB2I6+7MXXrk5llQ== +babel-plugin-jest-hoist@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-28.1.3.tgz#1952c4d0ea50f2d6d794353762278d1d8cca3fbe" + integrity sha512-Ys3tUKAmfnkRUpPdpa98eYrAR0nV+sSFUZZEGuQ2EbFd1y4SOLtD5QDNHAq+bb9a+bbXvYQC4b+ID/THIMcU6Q== dependencies: "@babel/template" "^7.3.3" "@babel/types" "^7.3.3" @@ -2567,12 +2601,12 @@ babel-preset-current-node-syntax@^1.0.0: "@babel/plugin-syntax-optional-chaining" "^7.8.3" "@babel/plugin-syntax-top-level-await" "^7.8.3" -babel-preset-jest@^28.0.2: - version "28.0.2" - resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-28.0.2.tgz#d8210fe4e46c1017e9fa13d7794b166e93aa9f89" - integrity sha512-sYzXIdgIXXroJTFeB3S6sNDWtlJ2dllCdTEsnZ65ACrMojj3hVNFRmnJ1HZtomGi+Be7aqpY/HJ92fr8OhKVkQ== +babel-preset-jest@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-28.1.3.tgz#5dfc20b99abed5db994406c2b9ab94c73aaa419d" + integrity sha512-L+fupJvlWAHbQfn74coNX3zf60LXMJsezNvvx8eIh7iOR1luJ1poxYgQk1F8PYtNq/6QODDHCqsSnTFSWC491A== dependencies: - babel-plugin-jest-hoist "^28.0.2" + babel-plugin-jest-hoist "^28.1.3" babel-preset-current-node-syntax "^1.0.0" bach@^1.0.0: @@ -2595,6 +2629,11 @@ balanced-match@^1.0.0: resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== +base64-js@^1.3.1: + version "1.5.1" + resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" + integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== + base@^0.11.1: version "0.11.2" resolved "https://registry.yarnpkg.com/base/-/base-0.11.2.tgz#7bde5ced145b6d551a90db87f83c558b4eb48a8f" @@ -2653,6 +2692,11 @@ binary-extensions@^1.0.0: resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-1.13.1.tgz#598afe54755b2868a5330d2aff9d4ebb53209b65" integrity sha512-Un7MIEDdUC5gNpcGDV97op1Ywk748MpHcFTHoYs6qnj1Z3j7I53VG3nwZhKzoBZmbdRNnb6WRdFlwl7tSDuZGw== +binary-extensions@^2.0.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.2.0.tgz#75f502eeaf9ffde42fc98829645be4ea76bd9e2d" + integrity sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA== + binaryextensions@^2.2.0: version "2.3.0" resolved "https://registry.yarnpkg.com/binaryextensions/-/binaryextensions-2.3.0.tgz#1d269cbf7e6243ea886aa41453c3651ccbe13c22" @@ -2673,6 +2717,15 @@ bl@^1.2.1: readable-stream "^2.3.5" safe-buffer "^5.1.1" +bl@^4.0.3, bl@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/bl/-/bl-4.1.0.tgz#451535264182bec2fbbc83a62ab98cf11d9f7b3a" + integrity sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w== + dependencies: + buffer "^5.5.0" + inherits "^2.0.4" + readable-stream "^3.4.0" + brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" @@ -2704,7 +2757,7 @@ braces@^2.3.1, braces@^2.3.2: split-string "^3.0.2" to-regex "^3.0.1" -braces@^3.0.2: +braces@^3.0.2, braces@~3.0.2: version "3.0.2" resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107" integrity sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A== @@ -2712,15 +2765,14 @@ braces@^3.0.2: fill-range "^7.0.1" browserslist@^4.14.5, browserslist@^4.20.2: - version "4.20.3" - resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.20.3.tgz#eb7572f49ec430e054f56d52ff0ebe9be915f8bf" - integrity sha512-NBhymBQl1zM0Y5dQT/O+xiLP9/rzOIQdKM/eMJBAq7yBgaB6krIYLGejrwVYnSHZdqjscB1SPuAjHwxjvN6Wdg== + version "4.21.3" + resolved "https://registry.yarnpkg.com/browserslist/-/browserslist-4.21.3.tgz#5df277694eb3c48bc5c4b05af3e8b7e09c5a6d1a" + integrity sha512-898rgRXLAyRkM1GryrrBHGkqA5hlpkV5MhtZwg9QXeiyLUYs2k00Un05aX5l2/yJIOObYKOpS2JNo8nJDE7fWQ== dependencies: - caniuse-lite "^1.0.30001332" - electron-to-chromium "^1.4.118" - escalade "^3.1.1" - node-releases "^2.0.3" - picocolors "^1.0.0" + caniuse-lite "^1.0.30001370" + electron-to-chromium "^1.4.202" + node-releases "^2.0.6" + update-browserslist-db "^1.0.5" bs-logger@0.x: version "0.2.6" @@ -2746,7 +2798,15 @@ buffer-from@^1.0.0: resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.2.tgz#2b146a6fd72e80b4f55d255f35ed59a3a9a41bd5" integrity sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ== -builtin-modules@^3.0.0: +buffer@^5.5.0: + version "5.7.1" + resolved "https://registry.yarnpkg.com/buffer/-/buffer-5.7.1.tgz#ba62e7c13133053582197160851a8f648e99eed0" + integrity sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ== + dependencies: + base64-js "^1.3.1" + ieee754 "^1.1.13" + +builtin-modules@^3.3.0: version "3.3.0" resolved "https://registry.yarnpkg.com/builtin-modules/-/builtin-modules-3.3.0.tgz#cae62812b89801e9656336e46223e030386be7b6" integrity sha512-zhaCDicdLuWN5UbN5IMnFqNMhNfo919sH85y2/ea+5Yg9TsTkeZxpL+JLbp6cgYFS4sRLp3YV4S6yDuqVWHYOw== @@ -2768,34 +2828,10 @@ byte-size@^7.0.0: resolved "https://registry.yarnpkg.com/byte-size/-/byte-size-7.0.1.tgz#b1daf3386de7ab9d706b941a748dbfc71130dee3" integrity sha512-crQdqyCwhokxwV1UyDzLZanhkugAgft7vt0qbbdt60C6Zf3CAiGmtUCylbtYwrU6loOUw3euGrNtW1J651ot1A== -cacache@^15.0.5, cacache@^15.2.0: - version "15.3.0" - resolved "https://registry.yarnpkg.com/cacache/-/cacache-15.3.0.tgz#dc85380fb2f556fe3dda4c719bfa0ec875a7f1eb" - integrity sha512-VVdYzXEn+cnbXpFgWs5hTT7OScegHVmLhJIR8Ufqk3iFD6A6j5iSX1KuBTfNEv4tdJWE2PzA6IVFtcLC7fN9wQ== - dependencies: - "@npmcli/fs" "^1.0.0" - "@npmcli/move-file" "^1.0.1" - chownr "^2.0.0" - fs-minipass "^2.0.0" - glob "^7.1.4" - infer-owner "^1.0.4" - lru-cache "^6.0.0" - minipass "^3.1.1" - minipass-collect "^1.0.2" - minipass-flush "^1.0.5" - minipass-pipeline "^1.2.2" - mkdirp "^1.0.3" - p-map "^4.0.0" - promise-inflight "^1.0.1" - rimraf "^3.0.2" - ssri "^8.0.1" - tar "^6.0.2" - unique-filename "^1.1.1" - cacache@^16.0.0, cacache@^16.0.6, cacache@^16.1.0: - version "16.1.0" - resolved "https://registry.yarnpkg.com/cacache/-/cacache-16.1.0.tgz#87a6bae558a511c9cb2a13768073e240ca76153a" - integrity sha512-Pk4aQkwCW82A4jGKFvcGkQFqZcMspfP9YWq9Pr87/ldDvlWf718zeI6KWCdKt/jeihu6BytHRUicJPB1K2k8EQ== + version "16.1.1" + resolved "https://registry.yarnpkg.com/cacache/-/cacache-16.1.1.tgz#4e79fb91d3efffe0630d5ad32db55cc1b870669c" + integrity sha512-VDKN+LHyCQXaaYZ7rA/qtkURU+/yYhviUdvqEv2LT6QPZU8jpyzEkEVAcKlKLt5dJ5BRp11ym8lo3NKLluEPLg== dependencies: "@npmcli/fs" "^2.1.0" "@npmcli/move-file" "^2.0.0" @@ -2878,10 +2914,10 @@ camelcase@^6.2.0, camelcase@^6.3.0: resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.3.0.tgz#5685b95eb209ac9c0c177467778c9c84df58ba9a" integrity sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA== -caniuse-lite@^1.0.30001332: - version "1.0.30001344" - resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001344.tgz#8a1e7fdc4db9c2ec79a05e9fd68eb93a761888bb" - integrity sha512-0ZFjnlCaXNOAYcV7i+TtdKBp0L/3XEU2MF/x6Du1lrh+SRX4IfzIVL4HNJg5pB2PmFb8rszIGyOvsZnqqRoc2g== +caniuse-lite@^1.0.30001370: + version "1.0.30001374" + resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001374.tgz#3dab138e3f5485ba2e74bd13eca7fe1037ce6f57" + integrity sha512-mWvzatRx3w+j5wx/mpFN5v5twlPrabG8NqX2c6e45LCpymdoGqNvRkRutFUqpRTXKFQFNQJasvK0YT7suW6/Hw== chalk@2.x, chalk@^2.0.0, chalk@^2.4.2: version "2.4.2" @@ -2892,7 +2928,15 @@ chalk@2.x, chalk@^2.0.0, chalk@^2.4.2: escape-string-regexp "^1.0.5" supports-color "^5.3.0" -chalk@^4.0.0, chalk@^4.1.0: +chalk@4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.0.tgz#4e14870a618d9e2edd97dd8345fd9d9dc315646a" + integrity sha512-qwx12AxXe2Q5xQ43Ac//I6v5aXTipYrSESdOgzrN+9XjgEpyjpKuvSGaN4qE93f7TQTlerQQ8S+EQ0EyDoVL1A== + dependencies: + ansi-styles "^4.1.0" + supports-color "^7.1.0" + +chalk@^4.0.0, chalk@^4.1.0, chalk@^4.1.1: version "4.1.2" resolved "https://registry.yarnpkg.com/chalk/-/chalk-4.1.2.tgz#aac4e2b7734a740867aeb16bf02aad556a1e7a01" integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA== @@ -2929,6 +2973,21 @@ chokidar@^2.0.0: optionalDependencies: fsevents "^1.2.7" +chokidar@^3.5.1: + version "3.5.3" + resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.3.tgz#1cf37c8707b932bd1af1ae22c0432e2acd1903bd" + integrity sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw== + dependencies: + anymatch "~3.1.2" + braces "~3.0.2" + glob-parent "~5.1.2" + is-binary-path "~2.1.0" + is-glob "~4.0.1" + normalize-path "~3.0.0" + readdirp "~3.6.0" + optionalDependencies: + fsevents "~2.3.2" + chownr@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/chownr/-/chownr-2.0.0.tgz#15bfbe53d2eab4cf70f18a8cd68ebe5b3cb1dece" @@ -2944,10 +3003,10 @@ ci-info@^2.0.0: resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-2.0.0.tgz#67a9e964be31a51e15e5010d58e6f12834002f46" integrity sha512-5tK7EtrZ0N+OLFMthtqOj4fI2Jeb88C4CAZPu25LDVUgXJ0A3Js4PMGqrn0JU1W0Mh1/Z8wZzYPxqUrXeBboCQ== -ci-info@^3.2.0, ci-info@^3.3.0: - version "3.3.1" - resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.3.1.tgz#58331f6f472a25fe3a50a351ae3052936c2c7f32" - integrity sha512-SXgeMX9VwDe7iFFaEWkA5AstuER9YKqy4EhHqr4DVqkwmD9rpVimkMKWHdjn30Ja45txyjhSn63lVX69eVCckg== +ci-info@^3.2.0, ci-info@^3.3.2: + version "3.3.2" + resolved "https://registry.yarnpkg.com/ci-info/-/ci-info-3.3.2.tgz#6d2967ffa407466481c6c90b6e16b3098f080128" + integrity sha512-xmDt/QIAdeZ9+nfdPsaBCpMvHNLFiLdjj59qjqn+6iPe6YmHGQ35sBnQ8uslRBXFmXkiZQOJRjvQeoGppoTjjg== cjs-module-lexer@^1.0.0: version "1.2.2" @@ -2976,13 +3035,30 @@ clean-stack@^2.0.0: resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-2.2.0.tgz#ee8472dbb129e727b31e8a10a427dee9dfe4008b" integrity sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A== -cli-cursor@^3.1.0: +clean-stack@^4.0.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-4.2.0.tgz#c464e4cde4ac789f4e0735c5d75beb49d7b30b31" + integrity sha512-LYv6XPxoyODi36Dp976riBtSY27VmFo+MKqEU9QCCWyTrdEPDog+RWA7xQWHi6Vbp61j5c4cdzzX1NidnwtUWg== + dependencies: + escape-string-regexp "5.0.0" + +cli-cursor@3.1.0, cli-cursor@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/cli-cursor/-/cli-cursor-3.1.0.tgz#264305a7ae490d1d03bf0c9ba7c925d1753af307" integrity sha512-I/zHAwsKf9FqGoXM4WWRACob9+SNukZTd94DWF57E4toouRulbCxcUh6RKUEOQlYTHJnzkPMySvPNaaSLNfLZw== dependencies: restore-cursor "^3.1.0" +cli-spinners@2.6.1: + version "2.6.1" + resolved "https://registry.yarnpkg.com/cli-spinners/-/cli-spinners-2.6.1.tgz#adc954ebe281c37a6319bfa401e6dd2488ffb70d" + integrity sha512-x/5fWmGMnbKQAaNwN+UZlV79qBLM9JFnJuJ03gIi5whrob0xV0ofNVHy9DhwGdsMJQc2OKv0oGmLzvaqvAVv+g== + +cli-spinners@^2.5.0: + version "2.7.0" + resolved "https://registry.yarnpkg.com/cli-spinners/-/cli-spinners-2.7.0.tgz#f815fd30b5f9eaac02db604c7a231ed7cb2f797a" + integrity sha512-qu3pN8Y3qHNgE2AFweciB1IfMnmZ/fsNTEE+NOFjmGB2F/7rLhnhzppvpCnN4FovtP26k8lHyy9ptEbNwWFLzw== + cli-width@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/cli-width/-/cli-width-3.0.0.tgz#a2f48437a2caa9a22436e794bf071ec9e61cedf6" @@ -3044,13 +3120,6 @@ cloneable-readable@^1.0.0: process-nextick-args "^2.0.0" readable-stream "^2.3.5" -cmd-shim@^4.1.0: - version "4.1.0" - resolved "https://registry.yarnpkg.com/cmd-shim/-/cmd-shim-4.1.0.tgz#b3a904a6743e9fede4148c6f3800bf2a08135bdd" - integrity sha512-lb9L7EM4I/ZRVuljLPEtUJOP+xiQVknZ4ZMpMgEp4JzNldPb27HU03hi6K1/6CoIuit/Zm/LQXySErFeXxDprw== - dependencies: - mkdirp-infer-owner "^2.0.0" - cmd-shim@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/cmd-shim/-/cmd-shim-5.0.0.tgz#8d0aaa1a6b0708630694c4dbde070ed94c707724" @@ -3119,7 +3188,7 @@ color-support@^1.1.3: resolved "https://registry.yarnpkg.com/color-support/-/color-support-1.1.3.tgz#93834379a1cc9a0c61f82f52f0d04322251bd5a2" integrity sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg== -columnify@^1.5.4: +columnify@^1.6.0: version "1.6.0" resolved "https://registry.yarnpkg.com/columnify/-/columnify-1.6.0.tgz#6989531713c9008bb29735e61e37acf5bd553cf3" integrity sha512-lomjuFZKfM6MSAnV9aCZC9sc0qGbmZdfygNv+nCpqVkSKdCxCklLtd16O0EILGkImHw9ZpHkAnHaB+8Zxq5W6Q== @@ -3218,7 +3287,7 @@ config-chain@^1.1.12: ini "^1.3.4" proto-list "~1.2.1" -console-control-strings@^1.0.0, console-control-strings@^1.1.0, console-control-strings@~1.1.0: +console-control-strings@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/console-control-strings/-/console-control-strings-1.1.0.tgz#3d7cf4464db6446ea644bf4b39507f9851008e8e" integrity sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ== @@ -3231,7 +3300,7 @@ conventional-changelog-angular@^5.0.12: compare-func "^2.0.0" q "^1.5.1" -conventional-changelog-core@^4.2.2: +conventional-changelog-core@^4.2.4: version "4.2.4" resolved "https://registry.yarnpkg.com/conventional-changelog-core/-/conventional-changelog-core-4.2.4.tgz#e50d047e8ebacf63fac3dc67bf918177001e1e9f" integrity sha512-gDVS+zVJHE2v4SLc6B0sLsPiloR0ygU7HaDW14aNJE1v4SlqJPILPl/aJC7YdtRE4CybBf8gDwObBvKha8Xlyg== @@ -3486,6 +3555,11 @@ defaults@^1.0.3: dependencies: clone "^1.0.2" +define-lazy-prop@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz#3f7ae421129bcaaac9bc74905c98a0009ec9ee7f" + integrity sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og== + define-properties@^1.1.3: version "1.1.4" resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.1.4.tgz#0b14d7bd7fbeb2f3572c3a7eda80ea5d57fb05b1" @@ -3516,27 +3590,27 @@ define-property@^2.0.2: is-descriptor "^1.0.2" isobject "^3.0.1" -del-cli@4.0.1: - version "4.0.1" - resolved "https://registry.yarnpkg.com/del-cli/-/del-cli-4.0.1.tgz#2303ccaa45708ee8c6211568344cf87336abf30a" - integrity sha512-KtR/6cBfZkGDAP2NA7z+bP4p1OMob3wjN9mq13+SWvExx6jT9gFWfLgXEeX8J2B47OKeNCq9yTONmtryQ+m+6g== +del-cli@5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/del-cli/-/del-cli-5.0.0.tgz#fa79fd57e888ecaaf8a468d87e8a175142a24aa9" + integrity sha512-rENFhUaYcjoMODwFhhlON+ogN7DoG+4+GFN+bsA1XeDt4w2OKQnQadFP1thHSAlK9FAtl88qgP66wOV+eFZZiQ== dependencies: - del "^6.0.0" - meow "^10.1.0" + del "^7.0.0" + meow "^10.1.3" -del@^6.0.0: - version "6.1.1" - resolved "https://registry.yarnpkg.com/del/-/del-6.1.1.tgz#3b70314f1ec0aa325c6b14eb36b95786671edb7a" - integrity sha512-ua8BhapfP0JUJKC/zV9yHHDW/rDoDxP4Zhn3AkA6/xT6gY7jYXJiaeyBZznYVujhZZET+UgcbZiQ7sN3WqcImg== +del@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/del/-/del-7.0.0.tgz#79db048bec96f83f344b46c1a66e35d9c09fe8ac" + integrity sha512-tQbV/4u5WVB8HMJr08pgw0b6nG4RGt/tj+7Numvq+zqcvUFeMaIWWOUFltiU+6go8BSO2/ogsB4EasDaj0y68Q== dependencies: - globby "^11.0.1" - graceful-fs "^4.2.4" - is-glob "^4.0.1" - is-path-cwd "^2.2.0" - is-path-inside "^3.0.2" - p-map "^4.0.0" + globby "^13.1.2" + graceful-fs "^4.2.10" + is-glob "^4.0.3" + is-path-cwd "^3.0.0" + is-path-inside "^4.0.0" + p-map "^5.5.0" rimraf "^3.0.2" - slash "^3.0.0" + slash "^4.0.0" delegates@^1.0.0: version "1.0.0" @@ -3586,15 +3660,10 @@ dezalgo@^1.0.0: asap "^2.0.0" wrappy "1" -diff-sequences@^27.5.1: - version "27.5.1" - resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-27.5.1.tgz#eaecc0d327fd68c8d9672a1e64ab8dccb2ef5327" - integrity sha512-k1gCAXAsNgLwEL+Y8Wvl+M6oEFj5bgazfZULpS5CneoPPXRaCCW7dm+q21Ky2VEE5X+VeRDBVg1Pcvvsr4TtNQ== - -diff-sequences@^28.0.2: - version "28.0.2" - resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-28.0.2.tgz#40f8d4ffa081acbd8902ba35c798458d0ff1af41" - integrity sha512-YtEoNynLDFCRznv/XDalsKGSZDoj0U5kLnXvY0JSq3nBboRrZXjD81+eSiwi+nzcZDwedMmcowcxNwwgFW23mQ== +diff-sequences@^28.1.1: + version "28.1.1" + resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-28.1.1.tgz#9989dc731266dc2903457a70e996f3a041913ac6" + integrity sha512-FU0iFaH/E23a+a718l8Qa/19bF9p06kgE0KipMOMadwa3SjnaElKzPaUC0vnibs6/B/9ni97s61mcejk8W1fQw== diff@^4.0.1: version "4.0.2" @@ -3629,6 +3698,11 @@ dot-prop@^6.0.1: dependencies: is-obj "^2.0.0" +dotenv@~10.0.0: + version "10.0.0" + resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-10.0.0.tgz#3d4227b8fb95f81096cdd2b66653fb2c7085ba81" + integrity sha512-rlBi9d8jpv9Sf1klPjNfFAuWDjKLwTIJJ/VxtoTwIR6hnZxcEOQCZg2oIL3MWBYw5GpUDKOEnND7LXTbIpQ03Q== + duplexer@^0.1.1, duplexer@^0.1.2: version "0.1.2" resolved "https://registry.yarnpkg.com/duplexer/-/duplexer-0.1.2.tgz#3abe43aef3835f8ae077d136ddce0f276b0400e6" @@ -3652,10 +3726,10 @@ each-props@^1.3.2: is-plain-object "^2.0.1" object.defaults "^1.1.0" -electron-to-chromium@^1.4.118: - version "1.4.142" - resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.4.142.tgz#70cc8871f7c0122b29256089989e67cee637b40d" - integrity sha512-ea8Q1YX0JRp4GylOmX4gFHIizi0j9GfRW4EkaHnkZp0agRCBB4ZGeCv17IEzIvBkiYVwfoKVhKZJbTfqCRdQdg== +electron-to-chromium@^1.4.202: + version "1.4.211" + resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.4.211.tgz#afaa8b58313807501312d598d99b953568d60f91" + integrity sha512-BZSbMpyFQU0KBJ1JG26XGeFI3i4op+qOYGxftmZXFZoHkhLgsSv4DHDJfl8ogII3hIuzGt51PaZ195OVu0yJ9A== emittery@^0.10.2: version "0.10.2" @@ -3667,28 +3741,35 @@ emoji-regex@^8.0.0: resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37" integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A== -encoding@^0.1.12, encoding@^0.1.13: +encoding@^0.1.13: version "0.1.13" resolved "https://registry.yarnpkg.com/encoding/-/encoding-0.1.13.tgz#56574afdd791f54a8e9b2785c0582a2d26210fa9" integrity sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A== dependencies: iconv-lite "^0.6.2" -end-of-stream@^1.0.0, end-of-stream@^1.1.0: +end-of-stream@^1.0.0, end-of-stream@^1.1.0, end-of-stream@^1.4.1, end-of-stream@^1.4.4: version "1.4.4" resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0" integrity sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q== dependencies: once "^1.4.0" -enhanced-resolve@^5.9.3: - version "5.9.3" - resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.9.3.tgz#44a342c012cbc473254af5cc6ae20ebd0aae5d88" - integrity sha512-Bq9VSor+kjvW3f9/MiiR4eE3XYgOl7/rS8lnSxbRbF3kS0B2r+Y9w5krBWxZgDxASVZbdYrn5wT4j/Wb0J9qow== +enhanced-resolve@^5.10.0: + version "5.10.0" + resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.10.0.tgz#0dc579c3bb2a1032e357ac45b8f3a6f3ad4fb1e6" + integrity sha512-T0yTFjdpldGY8PmuXXR0PyQ1ufZpEGiHVrp7zHKB7jdR4qlmZHhONVM5AQOAWXuF/w3dnHbEQVrNptJgt7F+cQ== dependencies: graceful-fs "^4.2.4" tapable "^2.2.0" +enquirer@~2.3.6: + version "2.3.6" + resolved "https://registry.yarnpkg.com/enquirer/-/enquirer-2.3.6.tgz#2a7fe5dd634a1e4125a975ec994ff5456dc3734d" + integrity sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg== + dependencies: + ansi-colors "^4.1.1" + env-paths@^2.2.0: version "2.2.1" resolved "https://registry.yarnpkg.com/env-paths/-/env-paths-2.2.1.tgz#420399d416ce1fbe9bc0a07c62fa68d67fd0f8f2" @@ -3724,9 +3805,9 @@ es-module-lexer@^0.9.0: integrity sha512-1HQ2M2sPtxwnvOvT1ZClHyQDiggdNjURWpY2we6aMKCQiUVxTmVs2UYPLIrD84sS+kMdUwfBSylbJPwNnBrnHQ== es5-ext@^0.10.35, es5-ext@^0.10.46, es5-ext@^0.10.50, es5-ext@^0.10.53, es5-ext@~0.10.14, es5-ext@~0.10.2, es5-ext@~0.10.46: - version "0.10.61" - resolved "https://registry.yarnpkg.com/es5-ext/-/es5-ext-0.10.61.tgz#311de37949ef86b6b0dcea894d1ffedb909d3269" - integrity sha512-yFhIqQAzu2Ca2I4SE2Au3rxVfmohU9Y7wqGR+s7+H7krk26NXhIRAZDgqd6xqjCEFUomDEA3/Bo/7fKmIkW1kA== + version "0.10.62" + resolved "https://registry.yarnpkg.com/es5-ext/-/es5-ext-0.10.62.tgz#5e6adc19a6da524bf3d1e02bbc8960e5eb49a9a5" + integrity sha512-BHLqn0klhEpnOKSrzn/Xsz2UIW8j+cGmo9JLzr8BiUapV8hPL9+FliFqjwr9ngW7jWdnxv6eO+/LqyhJVqgrjA== dependencies: es6-iterator "^2.0.3" es6-symbol "^3.1.3" @@ -3759,142 +3840,148 @@ es6-weak-map@^2.0.1, es6-weak-map@^2.0.3: es6-iterator "^2.0.3" es6-symbol "^3.1.1" -esbuild-android-64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-android-64/-/esbuild-android-64-0.14.42.tgz#d7ab3d44d3671218d22bce52f65642b12908d954" - integrity sha512-P4Y36VUtRhK/zivqGVMqhptSrFILAGlYp0Z8r9UQqHJ3iWztRCNWnlBzD9HRx0DbueXikzOiwyOri+ojAFfW6A== - -esbuild-android-arm64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-android-arm64/-/esbuild-android-arm64-0.14.42.tgz#45336d8bec49abddb3a022996a23373f45a57c27" - integrity sha512-0cOqCubq+RWScPqvtQdjXG3Czb3AWI2CaKw3HeXry2eoA2rrPr85HF7IpdU26UWdBXgPYtlTN1LUiuXbboROhg== - -esbuild-darwin-64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-darwin-64/-/esbuild-darwin-64-0.14.42.tgz#6dff5e44cd70a88c33323e2f5fb598e40c68a9e0" - integrity sha512-ipiBdCA3ZjYgRfRLdQwP82rTiv/YVMtW36hTvAN5ZKAIfxBOyPXY7Cejp3bMXWgzKD8B6O+zoMzh01GZsCuEIA== - -esbuild-darwin-arm64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-darwin-arm64/-/esbuild-darwin-arm64-0.14.42.tgz#2c7313e1b12d2fa5b889c03213d682fb92ca8c4f" - integrity sha512-bU2tHRqTPOaoH/4m0zYHbFWpiYDmaA0gt90/3BMEFaM0PqVK/a6MA2V/ypV5PO0v8QxN6gH5hBPY4YJ2lopXgA== - -esbuild-freebsd-64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-freebsd-64/-/esbuild-freebsd-64-0.14.42.tgz#ad1c5a564a7e473b8ce95ee7f76618d05d6daffc" - integrity sha512-75h1+22Ivy07+QvxHyhVqOdekupiTZVLN1PMwCDonAqyXd8TVNJfIRFrdL8QmSJrOJJ5h8H1I9ETyl2L8LQDaw== - -esbuild-freebsd-arm64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-freebsd-arm64/-/esbuild-freebsd-arm64-0.14.42.tgz#4bdb480234144f944f1930829bace7561135ddc7" - integrity sha512-W6Jebeu5TTDQMJUJVarEzRU9LlKpNkPBbjqSu+GUPTHDCly5zZEQq9uHkmHHl7OKm+mQ2zFySN83nmfCeZCyNA== - -esbuild-linux-32@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-linux-32/-/esbuild-linux-32-0.14.42.tgz#ef18fd19f067e9d2b5f677d6b82fa81519f5a8c2" - integrity sha512-Ooy/Bj+mJ1z4jlWcK5Dl6SlPlCgQB9zg1UrTCeY8XagvuWZ4qGPyYEWGkT94HUsRi2hKsXvcs6ThTOjBaJSMfg== - -esbuild-linux-64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-linux-64/-/esbuild-linux-64-0.14.42.tgz#d84e7333b1c1b22cf8b5b9dbb5dd9b2ecb34b79f" - integrity sha512-2L0HbzQfbTuemUWfVqNIjOfaTRt9zsvjnme6lnr7/MO9toz/MJ5tZhjqrG6uDWDxhsaHI2/nsDgrv8uEEN2eoA== - -esbuild-linux-arm64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-linux-arm64/-/esbuild-linux-arm64-0.14.42.tgz#dc19e282f8c4ffbaa470c02a4d171e4ae0180cca" - integrity sha512-c3Ug3e9JpVr8jAcfbhirtpBauLxzYPpycjWulD71CF6ZSY26tvzmXMJYooQ2YKqDY4e/fPu5K8bm7MiXMnyxuA== - -esbuild-linux-arm@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-linux-arm/-/esbuild-linux-arm-0.14.42.tgz#d49870e63e2242b8156bf473f2ee5154226be328" - integrity sha512-STq69yzCMhdRaWnh29UYrLSr/qaWMm/KqwaRF1pMEK7kDiagaXhSL1zQGXbYv94GuGY/zAwzK98+6idCMUOOCg== - -esbuild-linux-mips64le@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-linux-mips64le/-/esbuild-linux-mips64le-0.14.42.tgz#f4e6ff9bf8a6f175470498826f48d093b054fc22" - integrity sha512-QuvpHGbYlkyXWf2cGm51LBCHx6eUakjaSrRpUqhPwjh/uvNUYvLmz2LgPTTPwCqaKt0iwL+OGVL0tXA5aDbAbg== - -esbuild-linux-ppc64le@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-linux-ppc64le/-/esbuild-linux-ppc64le-0.14.42.tgz#ac9c66fc80ba9f8fda15a4cc08f4e55f6c0aed63" - integrity sha512-8ohIVIWDbDT+i7lCx44YCyIRrOW1MYlks9fxTo0ME2LS/fxxdoJBwHWzaDYhjvf8kNpA+MInZvyOEAGoVDrMHg== - -esbuild-linux-riscv64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-linux-riscv64/-/esbuild-linux-riscv64-0.14.42.tgz#21e0ae492a3a9bf4eecbfc916339a66e204256d0" - integrity sha512-DzDqK3TuoXktPyG1Lwx7vhaF49Onv3eR61KwQyxYo4y5UKTpL3NmuarHSIaSVlTFDDpcIajCDwz5/uwKLLgKiQ== - -esbuild-linux-s390x@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-linux-s390x/-/esbuild-linux-s390x-0.14.42.tgz#06d40b957250ffd9a2183bfdfc9a03d6fd21b3e8" - integrity sha512-YFRhPCxl8nb//Wn6SiS5pmtplBi4z9yC2gLrYoYI/tvwuB1jldir9r7JwAGy1Ck4D7sE7wBN9GFtUUX/DLdcEQ== - -esbuild-netbsd-64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-netbsd-64/-/esbuild-netbsd-64-0.14.42.tgz#185664f05f10914f14ed43bd9e22b7de584267f7" - integrity sha512-QYSD2k+oT9dqB/4eEM9c+7KyNYsIPgzYOSrmfNGDIyJrbT1d+CFVKvnKahDKNJLfOYj8N4MgyFaU9/Ytc6w5Vw== - -esbuild-openbsd-64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-openbsd-64/-/esbuild-openbsd-64-0.14.42.tgz#c29006f659eb4e55283044bbbd4eb4054fae8839" - integrity sha512-M2meNVIKWsm2HMY7+TU9AxM7ZVwI9havdsw6m/6EzdXysyCFFSoaTQ/Jg03izjCsK17FsVRHqRe26Llj6x0MNA== +esbuild-android-64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-android-64/-/esbuild-android-64-0.14.53.tgz#259bc3ef1399a3cad8f4f67c40ee20779c4de675" + integrity sha512-fIL93sOTnEU+NrTAVMIKiAw0YH22HWCAgg4N4Z6zov2t0kY9RAJ50zY9ZMCQ+RT6bnOfDt8gCTnt/RaSNA2yRA== + +esbuild-android-arm64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-android-arm64/-/esbuild-android-arm64-0.14.53.tgz#2158253d4e8f9fdd2a081bbb4f73b8806178841e" + integrity sha512-PC7KaF1v0h/nWpvlU1UMN7dzB54cBH8qSsm7S9mkwFA1BXpaEOufCg8hdoEI1jep0KeO/rjZVWrsH8+q28T77A== + +esbuild-darwin-64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-darwin-64/-/esbuild-darwin-64-0.14.53.tgz#b4681831fd8f8d06feb5048acbe90d742074cc2a" + integrity sha512-gE7P5wlnkX4d4PKvLBUgmhZXvL7lzGRLri17/+CmmCzfncIgq8lOBvxGMiQ4xazplhxq+72TEohyFMZLFxuWvg== + +esbuild-darwin-arm64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-darwin-arm64/-/esbuild-darwin-arm64-0.14.53.tgz#d267d957852d121b261b3f76ead86e5b5463acc9" + integrity sha512-otJwDU3hnI15Q98PX4MJbknSZ/WSR1I45il7gcxcECXzfN4Mrpft5hBDHXNRnCh+5858uPXBXA1Vaz2jVWLaIA== + +esbuild-freebsd-64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-freebsd-64/-/esbuild-freebsd-64-0.14.53.tgz#aca2af6d72b537fe66a38eb8f374fb66d4c98ca0" + integrity sha512-WkdJa8iyrGHyKiPF4lk0MiOF87Q2SkE+i+8D4Cazq3/iqmGPJ6u49je300MFi5I2eUsQCkaOWhpCVQMTKGww2w== + +esbuild-freebsd-arm64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-freebsd-arm64/-/esbuild-freebsd-arm64-0.14.53.tgz#76282e19312d914c34343c8a7da6cc5f051580b9" + integrity sha512-9T7WwCuV30NAx0SyQpw8edbKvbKELnnm1FHg7gbSYaatH+c8WJW10g/OdM7JYnv7qkimw2ZTtSA+NokOLd2ydQ== + +esbuild-linux-32@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-linux-32/-/esbuild-linux-32-0.14.53.tgz#1045d34cf7c5faaf2af3b29cc1573b06580c37e5" + integrity sha512-VGanLBg5en2LfGDgLEUxQko2lqsOS7MTEWUi8x91YmsHNyzJVT/WApbFFx3MQGhkf+XdimVhpyo5/G0PBY91zg== + +esbuild-linux-64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-linux-64/-/esbuild-linux-64-0.14.53.tgz#ab3f2ee2ebb5a6930c72d9539cb34b428808cbe4" + integrity sha512-pP/FA55j/fzAV7N9DF31meAyjOH6Bjuo3aSKPh26+RW85ZEtbJv9nhoxmGTd9FOqjx59Tc1ZbrJabuiXlMwuZQ== + +esbuild-linux-arm64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-linux-arm64/-/esbuild-linux-arm64-0.14.53.tgz#1f5530412f6690949e78297122350488d3266cfe" + integrity sha512-GDmWITT+PMsjCA6/lByYk7NyFssW4Q6in32iPkpjZ/ytSyH+xeEx8q7HG3AhWH6heemEYEWpTll/eui3jwlSnw== + +esbuild-linux-arm@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-linux-arm/-/esbuild-linux-arm-0.14.53.tgz#a44ec9b5b42007ab6c0d65a224ccc6bbd97c54cf" + integrity sha512-/u81NGAVZMopbmzd21Nu/wvnKQK3pT4CrvQ8BTje1STXcQAGnfyKgQlj3m0j2BzYbvQxSy+TMck4TNV2onvoPA== + +esbuild-linux-mips64le@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-linux-mips64le/-/esbuild-linux-mips64le-0.14.53.tgz#a4d0b6b17cfdeea4e41b0b085a5f73d99311be9f" + integrity sha512-d6/XHIQW714gSSp6tOOX2UscedVobELvQlPMkInhx1NPz4ThZI9uNLQ4qQJHGBGKGfu+rtJsxM4NVHLhnNRdWQ== + +esbuild-linux-ppc64le@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-linux-ppc64le/-/esbuild-linux-ppc64le-0.14.53.tgz#8c331822c85465434e086e3e6065863770c38139" + integrity sha512-ndnJmniKPCB52m+r6BtHHLAOXw+xBCWIxNnedbIpuREOcbSU/AlyM/2dA3BmUQhsHdb4w3amD5U2s91TJ3MzzA== + +esbuild-linux-riscv64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-linux-riscv64/-/esbuild-linux-riscv64-0.14.53.tgz#36fd75543401304bea8a2d63bf8ea18aaa508e00" + integrity sha512-yG2sVH+QSix6ct4lIzJj329iJF3MhloLE6/vKMQAAd26UVPVkhMFqFopY+9kCgYsdeWvXdPgmyOuKa48Y7+/EQ== + +esbuild-linux-s390x@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-linux-s390x/-/esbuild-linux-s390x-0.14.53.tgz#1622677ab6824123f48f75d3afc031cd41936129" + integrity sha512-OCJlgdkB+XPYndHmw6uZT7jcYgzmx9K+28PVdOa/eLjdoYkeAFvH5hTwX4AXGLZLH09tpl4bVsEtvuyUldaNCg== + +esbuild-netbsd-64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-netbsd-64/-/esbuild-netbsd-64-0.14.53.tgz#e86d0efd0116658be335492ed12e66b26b4baf52" + integrity sha512-gp2SB+Efc7MhMdWV2+pmIs/Ja/Mi5rjw+wlDmmbIn68VGXBleNgiEZG+eV2SRS0kJEUyHNedDtwRIMzaohWedQ== + +esbuild-openbsd-64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-openbsd-64/-/esbuild-openbsd-64-0.14.53.tgz#9bcbbe6f86304872c6e91f64c8eb73fc29c3588b" + integrity sha512-eKQ30ZWe+WTZmteDYg8S+YjHV5s4iTxeSGhJKJajFfQx9TLZJvsJX0/paqwP51GicOUruFpSUAs2NCc0a4ivQQ== esbuild-plugin-alias@0.2.1: version "0.2.1" resolved "https://registry.yarnpkg.com/esbuild-plugin-alias/-/esbuild-plugin-alias-0.2.1.tgz#45a86cb941e20e7c2bc68a2bea53562172494fcb" integrity sha512-jyfL/pwPqaFXyKnj8lP8iLk6Z0m099uXR45aSN8Av1XD4vhvQutxxPzgA2bTcAwQpa1zCXDcWOlhFgyP3GKqhQ== -esbuild-sunos-64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-sunos-64/-/esbuild-sunos-64-0.14.42.tgz#aa9eec112cd1e7105e7bb37000eca7d460083f8f" - integrity sha512-uXV8TAZEw36DkgW8Ak3MpSJs1ofBb3Smkc/6pZ29sCAN1KzCAQzsje4sUwugf+FVicrHvlamCOlFZIXgct+iqQ== - -esbuild-windows-32@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-windows-32/-/esbuild-windows-32-0.14.42.tgz#c3fc450853c61a74dacc5679de301db23b73e61e" - integrity sha512-4iw/8qWmRICWi9ZOnJJf9sYt6wmtp3hsN4TdI5NqgjfOkBVMxNdM9Vt3626G1Rda9ya2Q0hjQRD9W1o+m6Lz6g== - -esbuild-windows-64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-windows-64/-/esbuild-windows-64-0.14.42.tgz#b877aa37ff47d9fcf0ccb1ca6a24b31475a5e555" - integrity sha512-j3cdK+Y3+a5H0wHKmLGTJcq0+/2mMBHPWkItR3vytp/aUGD/ua/t2BLdfBIzbNN9nLCRL9sywCRpOpFMx3CxzA== - -esbuild-windows-arm64@0.14.42: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild-windows-arm64/-/esbuild-windows-arm64-0.14.42.tgz#79da8744626f24bc016dc40d016950b5a4a2bac5" - integrity sha512-+lRAARnF+hf8J0mN27ujO+VbhPbDqJ8rCcJKye4y7YZLV6C4n3pTRThAb388k/zqF5uM0lS5O201u0OqoWSicw== - -esbuild@0.14.42, esbuild@^0.14.38: - version "0.14.42" - resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.14.42.tgz#98587df0b024d5f6341b12a1d735a2bff55e1836" - integrity sha512-V0uPZotCEHokJdNqyozH6qsaQXqmZEOiZWrXnds/zaH/0SyrIayRXWRB98CENO73MIZ9T3HBIOsmds5twWtmgw== +esbuild-sunos-64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-sunos-64/-/esbuild-sunos-64-0.14.53.tgz#f7a872f7460bfb7b131f7188a95fbce3d1c577e8" + integrity sha512-OWLpS7a2FrIRukQqcgQqR1XKn0jSJoOdT+RlhAxUoEQM/IpytS3FXzCJM6xjUYtpO5GMY0EdZJp+ur2pYdm39g== + +esbuild-windows-32@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-windows-32/-/esbuild-windows-32-0.14.53.tgz#c5e3ca50e2d1439cc2c9fe4defa63bcd474ce709" + integrity sha512-m14XyWQP5rwGW0tbEfp95U6A0wY0DYPInWBB7D69FAXUpBpBObRoGTKRv36lf2RWOdE4YO3TNvj37zhXjVL5xg== + +esbuild-windows-64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-windows-64/-/esbuild-windows-64-0.14.53.tgz#ec2ab4a60c5215f092ffe1eab6d01319e88238af" + integrity sha512-s9skQFF0I7zqnQ2K8S1xdLSfZFsPLuOGmSx57h2btSEswv0N0YodYvqLcJMrNMXh6EynOmWD7rz+0rWWbFpIHQ== + +esbuild-windows-arm64@0.14.53: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild-windows-arm64/-/esbuild-windows-arm64-0.14.53.tgz#f71d403806bdf9f4a1f9d097db9aec949bd675c8" + integrity sha512-E+5Gvb+ZWts+00T9II6wp2L3KG2r3iGxByqd/a1RmLmYWVsSVUjkvIxZuJ3hYTIbhLkH5PRwpldGTKYqVz0nzQ== + +esbuild@0.14.53, esbuild@^0.14.47: + version "0.14.53" + resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.14.53.tgz#20b1007f686e8584f2a01a1bec5a37aac9498ce4" + integrity sha512-ohO33pUBQ64q6mmheX1mZ8mIXj8ivQY/L4oVuAshr+aJI+zLl+amrp3EodrUNDNYVrKJXGPfIHFGhO8slGRjuw== optionalDependencies: - esbuild-android-64 "0.14.42" - esbuild-android-arm64 "0.14.42" - esbuild-darwin-64 "0.14.42" - esbuild-darwin-arm64 "0.14.42" - esbuild-freebsd-64 "0.14.42" - esbuild-freebsd-arm64 "0.14.42" - esbuild-linux-32 "0.14.42" - esbuild-linux-64 "0.14.42" - esbuild-linux-arm "0.14.42" - esbuild-linux-arm64 "0.14.42" - esbuild-linux-mips64le "0.14.42" - esbuild-linux-ppc64le "0.14.42" - esbuild-linux-riscv64 "0.14.42" - esbuild-linux-s390x "0.14.42" - esbuild-netbsd-64 "0.14.42" - esbuild-openbsd-64 "0.14.42" - esbuild-sunos-64 "0.14.42" - esbuild-windows-32 "0.14.42" - esbuild-windows-64 "0.14.42" - esbuild-windows-arm64 "0.14.42" + "@esbuild/linux-loong64" "0.14.53" + esbuild-android-64 "0.14.53" + esbuild-android-arm64 "0.14.53" + esbuild-darwin-64 "0.14.53" + esbuild-darwin-arm64 "0.14.53" + esbuild-freebsd-64 "0.14.53" + esbuild-freebsd-arm64 "0.14.53" + esbuild-linux-32 "0.14.53" + esbuild-linux-64 "0.14.53" + esbuild-linux-arm "0.14.53" + esbuild-linux-arm64 "0.14.53" + esbuild-linux-mips64le "0.14.53" + esbuild-linux-ppc64le "0.14.53" + esbuild-linux-riscv64 "0.14.53" + esbuild-linux-s390x "0.14.53" + esbuild-netbsd-64 "0.14.53" + esbuild-openbsd-64 "0.14.53" + esbuild-sunos-64 "0.14.53" + esbuild-windows-32 "0.14.53" + esbuild-windows-64 "0.14.53" + esbuild-windows-arm64 "0.14.53" escalade@^3.1.1: version "3.1.1" resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.1.tgz#d8cfdc7000965c5a0174b4a82eaa5c0552742e40" integrity sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw== +escape-string-regexp@5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz#4683126b500b61762f2dbebace1806e8be31b1c8" + integrity sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw== + escape-string-regexp@^1.0.3, escape-string-regexp@^1.0.5: version "1.0.5" resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" @@ -3910,20 +3997,20 @@ escape-string-regexp@^4.0.0: resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34" integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA== -eslint-plugin-jest@26.4.6: - version "26.4.6" - resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-26.4.6.tgz#9d8184c1ecf077722a20cc236c7e14f4e263606f" - integrity sha512-R3mq1IepnhtsukHQsWxdyKra3OVwYB+N4k8i45ndqSfr8p9KZV6G+EIUt1Z7hzAh4KlsbXG+nCTlNeGFLFLNvA== +eslint-plugin-jest@26.7.0: + version "26.7.0" + resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-26.7.0.tgz#41d405ac9143e1284a3401282db47ed459436778" + integrity sha512-/YNitdfG3o3cC6juZziAdkk6nfJt01jXVfj4AgaYVLs7bupHzRDL5K+eipdzhDXtQsiqaX1TzfwSuRlEgeln1A== dependencies: "@typescript-eslint/utils" "^5.10.0" -eslint-plugin-unicorn@42.0.0: - version "42.0.0" - resolved "https://registry.yarnpkg.com/eslint-plugin-unicorn/-/eslint-plugin-unicorn-42.0.0.tgz#47d60c00c263ad743403b052db689e39acbacff1" - integrity sha512-ixBsbhgWuxVaNlPTT8AyfJMlhyC5flCJFjyK3oKE8TRrwBnaHvUbuIkCM1lqg8ryYrFStL/T557zfKzX4GKSlg== +eslint-plugin-unicorn@43.0.2: + version "43.0.2" + resolved "https://registry.yarnpkg.com/eslint-plugin-unicorn/-/eslint-plugin-unicorn-43.0.2.tgz#b189d58494c8a0985a4b89dba5dbfde3ad7575a5" + integrity sha512-DtqZ5mf/GMlfWoz1abIjq5jZfaFuHzGBZYIeuJfEoKKGWRHr2JiJR+ea+BF7Wx2N1PPRoT/2fwgiK1NnmNE3Hg== dependencies: - "@babel/helper-validator-identifier" "^7.15.7" - ci-info "^3.3.0" + "@babel/helper-validator-identifier" "^7.18.6" + ci-info "^3.3.2" clean-regexp "^1.0.0" eslint-utils "^3.0.0" esquery "^1.4.0" @@ -3934,7 +4021,7 @@ eslint-plugin-unicorn@42.0.0: read-pkg-up "^7.0.1" regexp-tree "^0.1.24" safe-regex "^2.1.1" - semver "^7.3.5" + semver "^7.3.7" strip-indent "^3.0.0" eslint-scope@5.1.1, eslint-scope@^5.1.1: @@ -3970,13 +4057,14 @@ eslint-visitor-keys@^3.3.0: resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-3.3.0.tgz#f6480fa6b1f30efe2d1968aa8ac745b862469826" integrity sha512-mQ+suqKJVyeuwGYHAdjMFqjCyfl8+Ldnxuyp3ldiMBFKkvytrXUZWaiPCEav8qDHKty44bD+qV1IP4T+w+xXRA== -eslint@8.16.0: - version "8.16.0" - resolved "https://registry.yarnpkg.com/eslint/-/eslint-8.16.0.tgz#6d936e2d524599f2a86c708483b4c372c5d3bbae" - integrity sha512-MBndsoXY/PeVTDJeWsYj7kLZ5hQpJOfMYLsF6LicLHQWbRDG19lK5jOix4DPl8yY4SUFcE3txy86OzFLWT+yoA== +eslint@8.21.0: + version "8.21.0" + resolved "https://registry.yarnpkg.com/eslint/-/eslint-8.21.0.tgz#1940a68d7e0573cef6f50037addee295ff9be9ef" + integrity sha512-/XJ1+Qurf1T9G2M5IHrsjp+xrGT73RZf23xA1z5wB1ZzzEAWSZKvRwhWxTFp1rvkvCfwcvAUNAP31bhKTTGfDA== dependencies: "@eslint/eslintrc" "^1.3.0" - "@humanwhocodes/config-array" "^0.9.2" + "@humanwhocodes/config-array" "^0.10.4" + "@humanwhocodes/gitignore-to-minimatch" "^1.0.2" ajv "^6.10.0" chalk "^4.0.0" cross-spawn "^7.0.2" @@ -3986,14 +4074,17 @@ eslint@8.16.0: eslint-scope "^7.1.1" eslint-utils "^3.0.0" eslint-visitor-keys "^3.3.0" - espree "^9.3.2" + espree "^9.3.3" esquery "^1.4.0" esutils "^2.0.2" fast-deep-equal "^3.1.3" file-entry-cache "^6.0.1" + find-up "^5.0.0" functional-red-black-tree "^1.0.1" glob-parent "^6.0.1" globals "^13.15.0" + globby "^11.1.0" + grapheme-splitter "^1.0.4" ignore "^5.2.0" import-fresh "^3.0.0" imurmurhash "^0.1.4" @@ -4015,12 +4106,12 @@ eslint@8.16.0: version "3.2.25" resolved "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz#c463cfa4e14aceea6b7cd7e669ef90de072ea60a" -espree@^9.3.2: - version "9.3.2" - resolved "https://registry.yarnpkg.com/espree/-/espree-9.3.2.tgz#f58f77bd334731182801ced3380a8cc859091596" - integrity sha512-D211tC7ZwouTIuY5x9XnS0E9sWNChB7IYKX/Xp5eQj3nFXhqmiUDB9q27y76oFl8jTg3pXcQx/bpxMfs3CIZbA== +espree@^9.3.2, espree@^9.3.3: + version "9.3.3" + resolved "https://registry.yarnpkg.com/espree/-/espree-9.3.3.tgz#2dd37c4162bb05f433ad3c1a52ddf8a49dc08e9d" + integrity sha512-ORs1Rt/uQTqUKjDdGCyrtYxbazf5umATSf/K4qxjmZHORR6HJk+2s/2Pqe+Kk49HHINC/xNIrGfgh8sZcll0ng== dependencies: - acorn "^8.7.1" + acorn "^8.8.0" acorn-jsx "^5.3.2" eslint-visitor-keys "^3.3.0" @@ -4121,16 +4212,16 @@ expand-tilde@^2.0.0, expand-tilde@^2.0.2: dependencies: homedir-polyfill "^1.0.1" -expect@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/expect/-/expect-28.1.0.tgz#10e8da64c0850eb8c39a480199f14537f46e8360" - integrity sha512-qFXKl8Pmxk8TBGfaFKRtcQjfXEnKAs+dmlxdwvukJZorwrAabT7M3h8oLOG01I2utEhkmUTi17CHaPBovZsKdw== +expect@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/expect/-/expect-28.1.3.tgz#90a7c1a124f1824133dd4533cce2d2bdcb6603ec" + integrity sha512-eEh0xn8HlsuOBxFgIss+2mX85VAS4Qy3OSkjV7rlBWljtA4oWH37glVGyOZSZvErDT/yBywZdPGwCXuTvSG85g== dependencies: - "@jest/expect-utils" "^28.1.0" + "@jest/expect-utils" "^28.1.3" jest-get-type "^28.0.2" - jest-matcher-utils "^28.1.0" - jest-message-util "^28.1.0" - jest-util "^28.1.0" + jest-matcher-utils "^28.1.3" + jest-message-util "^28.1.3" + jest-util "^28.1.3" ext@^1.1.2: version "1.6.0" @@ -4197,7 +4288,18 @@ fast-deep-equal@^3.1.1, fast-deep-equal@^3.1.3: resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525" integrity sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q== -fast-glob@^3.2.9: +fast-glob@3.2.7: + version "3.2.7" + resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.7.tgz#fd6cb7a2d7e9aa7a7846111e85a196d6b2f766a1" + integrity sha512-rYGMRwip6lUMvYD3BTScMwT1HtAs2d71SMv66Vrxs0IekGZEjhM0pcMfjQPnknBt2zeCwQMEupiN02ZP4DiT1Q== + dependencies: + "@nodelib/fs.stat" "^2.0.2" + "@nodelib/fs.walk" "^1.2.3" + glob-parent "^5.1.2" + merge2 "^1.3.0" + micromatch "^4.0.4" + +fast-glob@^3.2.11, fast-glob@^3.2.9: version "3.2.11" resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.2.11.tgz#a1172ad95ceb8a16e20caa5c5e56480e5129c1d9" integrity sha512-xrO3+1bxSo3ZVHAnqzyuewYT6aMFHRAd4Kcs92MAonjwQZLsK9d0SF1IyQ3k5PoirxTW0Oe/RqFgMQ6TcNE5Ew== @@ -4237,7 +4339,7 @@ fb-watchman@^2.0.0: dependencies: bser "2.1.1" -figures@^3.0.0: +figures@3.2.0, figures@^3.0.0: version "3.2.0" resolved "https://registry.yarnpkg.com/figures/-/figures-3.2.0.tgz#625c18bd293c604dc4a8ddb2febf0c88341746af" integrity sha512-yaduQFRKLXYOGgEn6AZau90j3ggSOyiqXU0F9JZfeXYhNa+Jk4X+s45A2zg5jns87GAFa34BBm2kXw4XpNcbdg== @@ -4278,11 +4380,6 @@ fill-range@^7.0.1: dependencies: to-regex-range "^5.0.1" -filter-obj@^1.1.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/filter-obj/-/filter-obj-1.1.0.tgz#9b311112bc6c6127a16e016c6c5d7f19e0805c5b" - integrity sha512-8rXg1ZnX7xzy2NGDVkBVaAy+lSlPNwad13BtgSlLuxfIslyt5Vg64U7tFcCt4WS1R0hvtnQybT/IyCkGZ3DpXQ== - find-replace@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/find-replace/-/find-replace-3.0.0.tgz#3e7e23d3b05167a76f770c9fbd5258b0def68c38" @@ -4365,15 +4462,20 @@ flat-cache@^3.0.4: flatted "^3.1.0" rimraf "^3.0.2" +flat@^5.0.2: + version "5.0.2" + resolved "https://registry.yarnpkg.com/flat/-/flat-5.0.2.tgz#8ca6fe332069ffa9d324c327198c598259ceb241" + integrity sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ== + flatbuffers@2.0.4: version "2.0.4" resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-2.0.4.tgz#034456e29ec480de48bad34f7fc18c03f20c9768" integrity sha512-4rUFVDPjSoP0tOII34oQf+72NKU7E088U5oX7kwICahft0UB2kOQ9wUzzCp+OHxByERIfxRDCgX5mP8Pjkfl0g== flatted@^3.1.0: - version "3.2.5" - resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.2.5.tgz#76c8584f4fc843db64702a6bd04ab7a8bd666da3" - integrity sha512-WIWGi2L3DyTUvUrwRKgGi9TwxQMUEqPOPQBVi71R96jZXJdFskXEmf54BoZaS1kknGODoIGASGEzBUYdyMCBJg== + version "3.2.6" + resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.2.6.tgz#022e9218c637f9f3fc9c35ab9c9193f05add60b2" + integrity sha512-0sQoMh9s0BYsm+12Huy/rkKxVu4R1+r96YX5cG44rHV0pQ6iC3Q+mkoMFaGWObMFYQxCVT+ssG1ksneA2MI9KQ== flush-write-stream@^1.0.2: version "1.1.1" @@ -4402,7 +4504,12 @@ fragment-cache@^0.2.1: dependencies: map-cache "^0.2.2" -fs-extra@^10.0.0: +fs-constants@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/fs-constants/-/fs-constants-1.0.0.tgz#6be0de9be998ce16af8afc24497b9ee9b7ccd9ad" + integrity sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow== + +fs-extra@^10.0.0, fs-extra@^10.1.0: version "10.1.0" resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-10.1.0.tgz#02873cfbc4084dde127eaa5f9905eef2325d1abf" integrity sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ== @@ -4436,7 +4543,7 @@ fs-mkdirp-stream@^1.0.0: graceful-fs "^4.1.11" through2 "^2.0.3" -fs-monkey@1.0.3: +fs-monkey@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/fs-monkey/-/fs-monkey-1.0.3.tgz#ae3ac92d53bb328efe0e9a1d9541f6ad8d48e2d3" integrity sha512-cybjIfiiE+pTWicSCLFHSrXZ6EilF30oh91FDP9S2B051prEa7QWfrVTQm10/dDpswBDXZugPa1Ogu8Yh+HV0Q== @@ -4483,20 +4590,6 @@ gauge@^4.0.3: strip-ansi "^6.0.1" wide-align "^1.1.5" -gauge@~2.7.3: - version "2.7.4" - resolved "https://registry.yarnpkg.com/gauge/-/gauge-2.7.4.tgz#2c03405c7538c39d7eb37b317022e325fb018bf7" - integrity sha512-14x4kjc6lkD3ltw589k0NrPD6cCNTD6CWoVUNpB85+DrtONoZn+Rug6xZU5RvSC4+TZPxA5AnBibQYAvZn41Hg== - dependencies: - aproba "^1.0.3" - console-control-strings "^1.0.0" - has-unicode "^2.0.0" - object-assign "^4.1.0" - signal-exit "^3.0.0" - string-width "^1.0.1" - strip-ansi "^3.0.1" - wide-align "^1.1.0" - gensync@^1.0.0-beta.2: version "1.0.0-beta.2" resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.2.tgz#32a6ee76c3d7f52d46b2b1ae5d93fea8580a25e0" @@ -4513,13 +4606,13 @@ get-caller-file@^2.0.5: integrity sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg== get-intrinsic@^1.0.2, get-intrinsic@^1.1.1: - version "1.1.1" - resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.1.1.tgz#15f59f376f855c446963948f0d24cd3637b4abc6" - integrity sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q== + version "1.1.2" + resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.1.2.tgz#336975123e05ad0b7ba41f152ee4aadbea6cf598" + integrity sha512-Jfm3OyCxHh9DJyc28qGk+JmfkpO41A4XkneDSujN9MDXrm4oDKdHvndhZ2dN94+ERNfkYJWDclW6k2L/ZGHjXA== dependencies: function-bind "^1.1.1" has "^1.0.3" - has-symbols "^1.0.1" + has-symbols "^1.0.3" get-package-type@^0.1.0: version "0.1.0" @@ -4578,20 +4671,20 @@ git-semver-tags@^4.1.1: meow "^8.0.0" semver "^6.0.0" -git-up@^4.0.0: - version "4.0.5" - resolved "https://registry.yarnpkg.com/git-up/-/git-up-4.0.5.tgz#e7bb70981a37ea2fb8fe049669800a1f9a01d759" - integrity sha512-YUvVDg/vX3d0syBsk/CKUTib0srcQME0JyHkL5BaYdwLsiCslPWmDSi8PUMo9pXYjrryMcmsCoCgsTpSCJEQaA== +git-up@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/git-up/-/git-up-6.0.0.tgz#dbd6e4eee270338be847a0601e6d0763c90b74db" + integrity sha512-6RUFSNd1c/D0xtGnyWN2sxza2bZtZ/EmI9448n6rCZruFwV/ezeEn2fJP7XnUQGwf0RAtd/mmUCbtH6JPYA2SA== dependencies: - is-ssh "^1.3.0" - parse-url "^6.0.0" + is-ssh "^1.4.0" + parse-url "^7.0.2" -git-url-parse@^11.4.4: - version "11.6.0" - resolved "https://registry.yarnpkg.com/git-url-parse/-/git-url-parse-11.6.0.tgz#c634b8de7faa66498a2b88932df31702c67df605" - integrity sha512-WWUxvJs5HsyHL6L08wOusa/IXYtMuCAhrMmnTjQPpBU0TTHyDhnOATNH3xNQz7YOQUsqIIPTGr4xiVti1Hsk5g== +git-url-parse@^12.0.0: + version "12.0.0" + resolved "https://registry.yarnpkg.com/git-url-parse/-/git-url-parse-12.0.0.tgz#4ba70bc1e99138321c57e3765aaf7428e5abb793" + integrity sha512-I6LMWsxV87vysX1WfsoglXsXg6GjQRKq7+Dgiseo+h0skmp5Hp2rzmcEIRQot9CPA+uzU7x1x7jZdqvTFGnB+Q== dependencies: - git-up "^4.0.0" + git-up "^6.0.0" gitconfiglocal@^1.0.0: version "1.0.0" @@ -4608,7 +4701,7 @@ glob-parent@^3.1.0: is-glob "^3.1.0" path-dirname "^1.0.0" -glob-parent@^5.1.1, glob-parent@^5.1.2: +glob-parent@^5.1.1, glob-parent@^5.1.2, glob-parent@~5.1.2: version "5.1.2" resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4" integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow== @@ -4656,7 +4749,19 @@ glob-watcher@^5.0.3: normalize-path "^3.0.0" object.defaults "^1.1.0" -glob@8.0.3, glob@^8.0.1, glob@^8.0.3: +glob@7.1.4: + version "7.1.4" + resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.4.tgz#aa608a2f6c577ad357e1ae5a5c26d9a8d1969255" + integrity sha512-hkLPepehmnKk41pUGm3sYxoFs/umurYfYJCerbXEyFIWcAzvpipAgVkBqqT9RBKMGjnq6kMuyYwha6csxbiM1A== + dependencies: + fs.realpath "^1.0.0" + inflight "^1.0.4" + inherits "2" + minimatch "^3.0.4" + once "^1.3.0" + path-is-absolute "^1.0.0" + +glob@8.0.3, glob@^8.0.1: version "8.0.3" resolved "https://registry.yarnpkg.com/glob/-/glob-8.0.3.tgz#415c6eb2deed9e502c68fa44a272e6da6eeca42e" integrity sha512-ull455NHSHI/Y1FqGaaYFaLGkNMMJbavMrEGFXG/PGrg6y7sutWHUHrz6gy6WEBH6akM1M414dWKCNs+IhKdiQ== @@ -4667,7 +4772,7 @@ glob@8.0.3, glob@^8.0.1, glob@^8.0.3: minimatch "^5.0.1" once "^1.3.0" -glob@^7.1.1, glob@^7.1.3, glob@^7.1.4, glob@^7.1.6: +glob@^7.1.1, glob@^7.1.3, glob@^7.1.4: version "7.2.3" resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.3.tgz#b8df0fb802bbfa8e89bd1d938b4e16578ed44f2b" integrity sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q== @@ -4705,13 +4810,13 @@ globals@^11.1.0: integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA== globals@^13.15.0: - version "13.15.0" - resolved "https://registry.yarnpkg.com/globals/-/globals-13.15.0.tgz#38113218c907d2f7e98658af246cef8b77e90bac" - integrity sha512-bpzcOlgDhMG070Av0Vy5Owklpv1I6+j96GhUI7Rh7IzDCKLzboflLrrfqMu8NquDbiR4EOQk7XzJwqVJxicxog== + version "13.17.0" + resolved "https://registry.yarnpkg.com/globals/-/globals-13.17.0.tgz#902eb1e680a41da93945adbdcb5a9f361ba69bd4" + integrity sha512-1C+6nQRb1GwGMKm2dH/E7enFAMxGTmGI7/dEdhy/DNelv85w9B72t3uc5frtMNXIbzrarJJ/lTCjcaZwbLJmyw== dependencies: type-fest "^0.20.2" -globby@^11.0.1, globby@^11.0.2, globby@^11.1.0: +globby@^11.0.2, globby@^11.1.0: version "11.1.0" resolved "https://registry.yarnpkg.com/globby/-/globby-11.1.0.tgz#bd4be98bb042f83d796f7e3811991fbe82a0d34b" integrity sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g== @@ -4723,6 +4828,17 @@ globby@^11.0.1, globby@^11.0.2, globby@^11.1.0: merge2 "^1.4.1" slash "^3.0.0" +globby@^13.1.2: + version "13.1.2" + resolved "https://registry.yarnpkg.com/globby/-/globby-13.1.2.tgz#29047105582427ab6eca4f905200667b056da515" + integrity sha512-LKSDZXToac40u8Q1PQtZihbNdTYSNMuWe+K5l+oa6KgDzSvVrHXlJy40hUP522RjAIoNLJYBJi7ow+rbFpIhHQ== + dependencies: + dir-glob "^3.0.1" + fast-glob "^3.2.11" + ignore "^5.2.0" + merge2 "^1.4.1" + slash "^4.0.0" + glogg@^1.0.0: version "1.0.2" resolved "https://registry.yarnpkg.com/glogg/-/glogg-1.0.2.tgz#2d7dd702beda22eb3bffadf880696da6d846313f" @@ -4730,46 +4846,51 @@ glogg@^1.0.0: dependencies: sparkles "^1.0.0" -google-closure-compiler-java@^20220502.0.0: - version "20220502.0.0" - resolved "https://registry.yarnpkg.com/google-closure-compiler-java/-/google-closure-compiler-java-20220502.0.0.tgz#a92696bfc05489738dc06f797041985bbfb334be" - integrity sha512-XDXw1v+1zcNHuEUXQg24eD9MUF2XTHnEDKCwF0P0zQe+8TWQajKvjsekdJnO6JH/Lqcu8XKc7dxO5+SMijr0sw== +google-closure-compiler-java@^20220719.0.0: + version "20220719.0.0" + resolved "https://registry.yarnpkg.com/google-closure-compiler-java/-/google-closure-compiler-java-20220719.0.0.tgz#a6dc7c7a7e58670d17467c64a77b2fd94418e38e" + integrity sha512-tjWdQSkFqxaFCgzUBaiJj2CxrWUYV0Ij2txp9Um+GyvrzMeX9rqHSUeW4I9cGpOrXkamvWCyAig4Yi0NZXApdg== -google-closure-compiler-linux@^20220502.0.0: - version "20220502.0.0" - resolved "https://registry.yarnpkg.com/google-closure-compiler-linux/-/google-closure-compiler-linux-20220502.0.0.tgz#64a3c1723f102e047433d85ec1dfd1d101a84b4f" - integrity sha512-T+2p/Qj02yGZHxymhj1oZsiHudNvI9sQKfCLoIH0wi0ikDiVIOh/dsH+57lsaGDJ+XTP/ur5Ozl8GIOjv1Efrw== +google-closure-compiler-linux@^20220719.0.0: + version "20220719.0.0" + resolved "https://registry.yarnpkg.com/google-closure-compiler-linux/-/google-closure-compiler-linux-20220719.0.0.tgz#85ebdf3da6aaeee094295d6fc2be23324cf386c9" + integrity sha512-Em8QEAH7RC8T41QgTZC2keO0gsNdQgburXuXoF6gv2ySD/kJvNqrlZCCqLZMZUF6iuCpu3PgnMahdd3IrLpprA== -google-closure-compiler-osx@^20220502.0.0: - version "20220502.0.0" - resolved "https://registry.yarnpkg.com/google-closure-compiler-osx/-/google-closure-compiler-osx-20220502.0.0.tgz#b5e40b2adf737622d435d9bfc99d0912a75f967e" - integrity sha512-VwEncD4I1gfkF3zyHlRcUsx2o/poC0qzHjBv+g3Z09wHy9tuqjQ4EP8LmN/GMuV2Hai6gQvkKC0XjYnZTFx2mQ== +google-closure-compiler-osx@^20220719.0.0: + version "20220719.0.0" + resolved "https://registry.yarnpkg.com/google-closure-compiler-osx/-/google-closure-compiler-osx-20220719.0.0.tgz#e97246adffc109a64bd79bcfdf964c78664c66da" + integrity sha512-NwOLgq0ftq0kY1jum6vrafwUMQrCJEpJu6wv5fW/TnYUprPJb1J0T7c4Su8wSm9rdvpqkkqWWMGpfb9RJBuM0g== -google-closure-compiler-windows@^20220502.0.0: - version "20220502.0.0" - resolved "https://registry.yarnpkg.com/google-closure-compiler-windows/-/google-closure-compiler-windows-20220502.0.0.tgz#6c07ebeddd70e138135ae9382b0ced50aea5add6" - integrity sha512-ssdAUS2VZxJAyciVrbhpnYymvm//V4CHyg8aLvMisUfWRDeUSsOCC5mNXy6D8f9i9bYHs3cFV3itIRUfnYCEWg== +google-closure-compiler-windows@^20220719.0.0: + version "20220719.0.0" + resolved "https://registry.yarnpkg.com/google-closure-compiler-windows/-/google-closure-compiler-windows-20220719.0.0.tgz#1177d2d27515d7d0867cdc4e258b32b499a6c005" + integrity sha512-Qi88lkU7a45SzCwdd9CV1D6paiiF7cEpBefkJIaNKi9MBfOZueHZH7Y8/56rdMhJLRjv5VMCGoJY4xH6FcXkvg== -google-closure-compiler@20220502.0.0: - version "20220502.0.0" - resolved "https://registry.yarnpkg.com/google-closure-compiler/-/google-closure-compiler-20220502.0.0.tgz#94d793f60be006236b174f8e1bc3c1a493ed86f1" - integrity sha512-i9Qdve2v3jlerkHzlm00bpYds+kfAlIdeaOQ+acK/pHPHeLjhiXS+EyIpegVnH8+TY3I1QAMZFuVEXkMVJqpBQ== +google-closure-compiler@20220719.0.0: + version "20220719.0.0" + resolved "https://registry.yarnpkg.com/google-closure-compiler/-/google-closure-compiler-20220719.0.0.tgz#234a53e0a09ea04ae04df6bab877e9c833afbd8f" + integrity sha512-0KTxUoX8WBZGeprvZfzp+czdi6wJ5wfJnG4RsIMEPFLR67fW4f+ghh04WSBLBt8kgT64NxaZGESjq23v0dbYNg== dependencies: chalk "2.x" - google-closure-compiler-java "^20220502.0.0" + google-closure-compiler-java "^20220719.0.0" minimist "1.x" vinyl "2.x" vinyl-sourcemaps-apply "^0.2.0" optionalDependencies: - google-closure-compiler-linux "^20220502.0.0" - google-closure-compiler-osx "^20220502.0.0" - google-closure-compiler-windows "^20220502.0.0" + google-closure-compiler-linux "^20220719.0.0" + google-closure-compiler-osx "^20220719.0.0" + google-closure-compiler-windows "^20220719.0.0" -graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.4, graceful-fs@^4.2.6, graceful-fs@^4.2.9: +graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.10, graceful-fs@^4.2.4, graceful-fs@^4.2.6, graceful-fs@^4.2.9: version "4.2.10" resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.10.tgz#147d3a006da4ca3ce14728c7aefc287c367d7a6c" integrity sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA== +grapheme-splitter@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/grapheme-splitter/-/grapheme-splitter-1.0.4.tgz#9cf3a665c6247479896834af35cf1dbb4400767e" + integrity sha512-bzh50DW9kTPM00T8y4o8vQg89Di9oLJVLW/KaOGIXJWP/iqCN6WKYkbNOF04vFLJhwcpYUh9ydh/+5vpOqV4YQ== + gulp-cli@^2.2.0: version "2.3.0" resolved "https://registry.yarnpkg.com/gulp-cli/-/gulp-cli-2.3.0.tgz#ec0d380e29e52aa45e47977f0d32e18fd161122f" @@ -4794,12 +4915,12 @@ gulp-cli@^2.2.0: v8flags "^3.2.0" yargs "^7.1.0" -gulp-esbuild@0.10.3: - version "0.10.3" - resolved "https://registry.yarnpkg.com/gulp-esbuild/-/gulp-esbuild-0.10.3.tgz#cbcc2bf46ceddb2c0f967ab95fdeba8f9ee62b52" - integrity sha512-uJ0N2qR+XS1ElobanRUE/ulgGuAuL/bqeE0JgkGuT46KCryzP5rAjV8+nheW7yV+8JEczdJ5dIUGXQaHBpGQjQ== +gulp-esbuild@0.10.4: + version "0.10.4" + resolved "https://registry.yarnpkg.com/gulp-esbuild/-/gulp-esbuild-0.10.4.tgz#c2a68ef1b38a219bc44ac3479889d1837c19c6e1" + integrity sha512-Jj/30Vn0Sq1g4JQC8CqLq8VZJlucO+D9N7F5SidAnVSSIGuz4sOr6c4R5IP7ILeYJn5D1kisd8DXPNRECCFXGQ== dependencies: - esbuild "^0.14.38" + esbuild "^0.14.47" plugin-error "^1.0.1" vinyl "^2.2.1" @@ -4937,12 +5058,12 @@ has-property-descriptors@^1.0.0: dependencies: get-intrinsic "^1.1.1" -has-symbols@^1.0.1: +has-symbols@^1.0.1, has-symbols@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8" integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A== -has-unicode@^2.0.0, has-unicode@^2.0.1: +has-unicode@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/has-unicode/-/has-unicode-2.0.1.tgz#e0e6fe6a28cf51138855e086d1691e771de2a8b9" integrity sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ== @@ -4997,6 +5118,13 @@ hosted-git-info@^2.1.4: resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.9.tgz#dffc0bf9a21c02209090f2aa69429e1414daf3f9" integrity sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw== +hosted-git-info@^3.0.6: + version "3.0.8" + resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-3.0.8.tgz#6e35d4cc87af2c5f816e4cb9ce350ba87a3f370d" + integrity sha512-aXpmwoOhRBrw6X3j0h5RloK4x1OzsxMPyxqIHyNfSe2pypkVTZFpEiRoSipPEPlMrh0HW/XsjkJ5WgnCirpNUw== + dependencies: + lru-cache "^6.0.0" + hosted-git-info@^4.0.0, hosted-git-info@^4.0.1: version "4.1.0" resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-4.1.0.tgz#827b82867e9ff1c8d0c4d9d53880397d2c86d224" @@ -5021,15 +5149,6 @@ http-cache-semantics@^4.1.0: resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-4.1.0.tgz#49e91c5cbf36c9b94bcfcd71c23d5249ec74e390" integrity sha512-carPklcUh7ROWRK7Cv27RPtdhYhUsela/ue5/jKzjegVvXDqM2ILE9Q2BGn9JZJh1g87cp56su/FgQSzcWS8cQ== -http-proxy-agent@^4.0.1: - version "4.0.1" - resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-4.0.1.tgz#8a8c8ef7f5932ccf953c296ca8291b95aa74aa3a" - integrity sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg== - dependencies: - "@tootallnate/once" "1" - agent-base "6" - debug "4" - http-proxy-agent@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-5.0.0.tgz#5129800203520d434f142bc78ff3c170800f2b43" @@ -5073,12 +5192,10 @@ iconv-lite@^0.6.2: dependencies: safer-buffer ">= 2.1.2 < 3.0.0" -ignore-walk@^3.0.3: - version "3.0.4" - resolved "https://registry.yarnpkg.com/ignore-walk/-/ignore-walk-3.0.4.tgz#c9a09f69b7c7b479a5d74ac1a3c0d4236d2a6335" - integrity sha512-PY6Ii8o1jMRA1z4F2hRkH/xN59ox43DavKvD3oDpfurRlOJyAHpifIwpbdv1n4jt4ov0jSpw3kQ4GhJnpBL6WQ== - dependencies: - minimatch "^3.0.4" +ieee754@^1.1.13: + version "1.2.1" + resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.2.1.tgz#8eb7a10a63fff25d15a57b001586d177d1b0d352" + integrity sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA== ignore-walk@^5.0.1: version "5.0.1" @@ -5087,7 +5204,7 @@ ignore-walk@^5.0.1: dependencies: minimatch "^5.0.1" -ignore@^5.2.0: +ignore@^5.0.4, ignore@^5.2.0: version "5.2.0" resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.2.0.tgz#6d3bac8fa7fe0d45d9f9be7bac2fc279577e345a" integrity sha512-CmxgYGiEPCLhfLnpPp1MoRmifwEIOgjcHXxOBjv7mY96c+eWScsOP9c112ZyLdWHi0FxHjI+4uVhKYp/gcdRmQ== @@ -5146,37 +5263,39 @@ ini@^1.3.2, ini@^1.3.4: resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.8.tgz#a29da425b48806f34767a4efce397269af28432c" integrity sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew== -init-package-json@^2.0.2: - version "2.0.5" - resolved "https://registry.yarnpkg.com/init-package-json/-/init-package-json-2.0.5.tgz#78b85f3c36014db42d8f32117252504f68022646" - integrity sha512-u1uGAtEFu3VA6HNl/yUWw57jmKEMx8SKOxHhxjGnOFUiIlFnohKDFg4ZrPpv9wWqk44nDxGJAtqjdQFm+9XXQA== +init-package-json@^3.0.2: + version "3.0.2" + resolved "https://registry.yarnpkg.com/init-package-json/-/init-package-json-3.0.2.tgz#f5bc9bac93f2bdc005778bc2271be642fecfcd69" + integrity sha512-YhlQPEjNFqlGdzrBfDNRLhvoSgX7iQRgSxgsNknRQ9ITXFT7UMfVMWhBTOh2Y+25lRnGrv5Xz8yZwQ3ACR6T3A== dependencies: - npm-package-arg "^8.1.5" + npm-package-arg "^9.0.1" promzard "^0.3.0" - read "~1.0.1" - read-package-json "^4.1.1" + read "^1.0.7" + read-package-json "^5.0.0" semver "^7.3.5" validate-npm-package-license "^3.0.4" - validate-npm-package-name "^3.0.0" + validate-npm-package-name "^4.0.0" -inquirer@^7.3.3: - version "7.3.3" - resolved "https://registry.yarnpkg.com/inquirer/-/inquirer-7.3.3.tgz#04d176b2af04afc157a83fd7c100e98ee0aad003" - integrity sha512-JG3eIAj5V9CwcGvuOmoo6LB9kbAYT8HXffUl6memuszlwDC/qvFAJw49XJ5NROSFNPxp3iQg1GqkFhaY/CR0IA== +inquirer@^8.2.4: + version "8.2.4" + resolved "https://registry.yarnpkg.com/inquirer/-/inquirer-8.2.4.tgz#ddbfe86ca2f67649a67daa6f1051c128f684f0b4" + integrity sha512-nn4F01dxU8VeKfq192IjLsxu0/OmMZ4Lg3xKAns148rCaXP6ntAoEkVYZThWjwON8AlzdZZi6oqnhNbxUG9hVg== dependencies: ansi-escapes "^4.2.1" - chalk "^4.1.0" + chalk "^4.1.1" cli-cursor "^3.1.0" cli-width "^3.0.0" external-editor "^3.0.3" figures "^3.0.0" - lodash "^4.17.19" + lodash "^4.17.21" mute-stream "0.0.8" + ora "^5.4.1" run-async "^2.4.0" - rxjs "^6.6.0" + rxjs "^7.5.5" string-width "^4.1.0" strip-ansi "^6.0.0" through "^2.3.6" + wrap-ansi "^7.0.0" interpret@^1.4.0: version "1.4.0" @@ -5188,10 +5307,10 @@ invert-kv@^1.0.0: resolved "https://registry.yarnpkg.com/invert-kv/-/invert-kv-1.0.0.tgz#104a8e4aaca6d3d8cd157a8ef8bfab2d7a3ffdb6" integrity sha512-xgs2NH9AE66ucSq4cNG1nhSFghr5l6tdL15Pk+jl46bmmBapgoaY/AacXyaDznAqmGL99TiLSQgO/XazFSKYeQ== -ip@^1.1.5: - version "1.1.8" - resolved "https://registry.yarnpkg.com/ip/-/ip-1.1.8.tgz#ae05948f6b075435ed3307acce04629da8cdbf48" - integrity sha512-PuExPYUiu6qMBQb4l06ecm6T6ujzhmh+MeJcW9wa89PoAz5pvd4zPgN5WJV104mb6S2T1AwNIAaB70JNrLQWhg== +ip@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/ip/-/ip-2.0.0.tgz#4cf4ab182fee2314c75ede1276f8c80b479936da" + integrity sha512-WKa+XuLG1A1R0UWhl2+1XQSi+fZWMsYKffMZTTYsiZaUD8k2yDAj5atimTUD2TZkyCkNEeYE5NhFZmupOGtjYQ== is-absolute@^1.0.0: version "1.0.0" @@ -5227,17 +5346,24 @@ is-binary-path@^1.0.0: dependencies: binary-extensions "^1.0.0" +is-binary-path@~2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/is-binary-path/-/is-binary-path-2.1.0.tgz#ea1f7f3b80f064236e83470f86c09c254fb45b09" + integrity sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw== + dependencies: + binary-extensions "^2.0.0" + is-buffer@^1.1.5: version "1.1.6" resolved "https://registry.yarnpkg.com/is-buffer/-/is-buffer-1.1.6.tgz#efaa2ea9daa0d7ab2ea13a97b2b8ad51fefbe8be" integrity sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w== is-builtin-module@^3.1.0: - version "3.1.0" - resolved "https://registry.yarnpkg.com/is-builtin-module/-/is-builtin-module-3.1.0.tgz#6fdb24313b1c03b75f8b9711c0feb8c30b903b00" - integrity sha512-OV7JjAgOTfAFJmHZLvpSTb4qi0nIILDV1gWPYDnDJUTNFM5aGlRAhk4QcT8i7TuAleeEV5Fdkqn3t4mS+Q11fg== + version "3.2.0" + resolved "https://registry.yarnpkg.com/is-builtin-module/-/is-builtin-module-3.2.0.tgz#bb0310dfe881f144ca83f30100ceb10cf58835e0" + integrity sha512-phDA4oSGt7vl1n5tJvTWooWWAsXLY+2xCnxNqvKhGEzujg+A43wPlPOyDg3C8XQHN+6k/JTQWJ/j0dQh/qr+Hw== dependencies: - builtin-modules "^3.0.0" + builtin-modules "^3.3.0" is-ci@^2.0.0: version "2.0.0" @@ -5246,10 +5372,10 @@ is-ci@^2.0.0: dependencies: ci-info "^2.0.0" -is-core-module@^2.5.0, is-core-module@^2.8.1: - version "2.9.0" - resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.9.0.tgz#e1c34429cd51c6dd9e09e0799e396e27b19a9c69" - integrity sha512-+5FPy5PnwmO3lvfMb0AsoPaBG+5KHUI0wYFXOtYPnVVVspTFUuMZNfNaNVRt3FZadstu2c8x23vykRW/NBoU6A== +is-core-module@^2.5.0, is-core-module@^2.8.1, is-core-module@^2.9.0: + version "2.10.0" + resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.10.0.tgz#9012ede0a91c69587e647514e1d5277019e728ed" + integrity sha512-Erxj2n/LDAZ7H8WNJXd9tw38GYM3dv8rk8Zcs+jJuxYTW7sozH+SS8NtrSjVL1/vpLvWi1hxy96IzjJ3EHTJJg== dependencies: has "^1.0.3" @@ -5285,6 +5411,11 @@ is-descriptor@^1.0.0, is-descriptor@^1.0.2: is-data-descriptor "^1.0.0" kind-of "^6.0.2" +is-docker@^2.0.0, is-docker@^2.1.1: + version "2.2.1" + resolved "https://registry.yarnpkg.com/is-docker/-/is-docker-2.2.1.tgz#33eeabe23cfe86f14bde4408a02c0cfb853acdaa" + integrity sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ== + is-extendable@^0.1.0, is-extendable@^0.1.1: version "0.1.1" resolved "https://registry.yarnpkg.com/is-extendable/-/is-extendable-0.1.1.tgz#62b110e289a471418e3ec36a617d472e301dfc89" @@ -5326,13 +5457,18 @@ is-glob@^3.1.0: dependencies: is-extglob "^2.1.0" -is-glob@^4.0.0, is-glob@^4.0.1, is-glob@^4.0.3: +is-glob@^4.0.0, is-glob@^4.0.1, is-glob@^4.0.3, is-glob@~4.0.1: version "4.0.3" resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084" integrity sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg== dependencies: is-extglob "^2.1.1" +is-interactive@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/is-interactive/-/is-interactive-1.0.0.tgz#cea6e6ae5c870a7b0a0004070b7b587e0252912e" + integrity sha512-2HvIEKRoqS62guEC+qBjpvRubdX910WCMuJTZ+I9yvqKU2/12eSL549HMwtabb4oupdj2sMP50k+XJfB/8JE6w== + is-lambda@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/is-lambda/-/is-lambda-1.0.1.tgz#3d9877899e6a53efc0160504cde15f82e6f061d5" @@ -5370,15 +5506,15 @@ is-obj@^2.0.0: resolved "https://registry.yarnpkg.com/is-obj/-/is-obj-2.0.0.tgz#473fb05d973705e3fd9620545018ca8e22ef4982" integrity sha512-drqDG3cbczxxEJRoOXcOjtdp1J/lyp1mNn0xaznRs8+muBhgQcrnbspox5X5fOw0HnMnbfDzvnEMEtqDEJEo8w== -is-path-cwd@^2.2.0: - version "2.2.0" - resolved "https://registry.yarnpkg.com/is-path-cwd/-/is-path-cwd-2.2.0.tgz#67d43b82664a7b5191fd9119127eb300048a9fdb" - integrity sha512-w942bTcih8fdJPJmQHFzkS76NEP8Kzzvmw92cXsazb8intwLqPibPPdXf4ANdKV3rYMuuQYGIWtvz9JilB3NFQ== +is-path-cwd@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/is-path-cwd/-/is-path-cwd-3.0.0.tgz#889b41e55c8588b1eb2a96a61d05740a674521c7" + integrity sha512-kyiNFFLU0Ampr6SDZitD/DwUo4Zs1nSdnygUBqsu3LooL00Qvb5j+UnvApUn/TTj1J3OuE6BTdQ5rudKmU2ZaA== -is-path-inside@^3.0.2: - version "3.0.3" - resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-3.0.3.tgz#d231362e53a07ff2b0e0ea7fed049161ffd16283" - integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ== +is-path-inside@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-4.0.0.tgz#805aeb62c47c1b12fc3fd13bfb3ed1e7430071db" + integrity sha512-lJJV/5dYS+RcL8uQdBDW9c9uWFLLBNRyFhnAKXw5tVqLlKZ4RMGZKv+YQ/IA3OhD+RpbJa1LLFM1FQPGyIXvOA== is-plain-obj@^1.0.0, is-plain-obj@^1.1.0: version "1.1.0" @@ -5414,12 +5550,12 @@ is-relative@^1.0.0: dependencies: is-unc-path "^1.0.0" -is-ssh@^1.3.0: - version "1.3.3" - resolved "https://registry.yarnpkg.com/is-ssh/-/is-ssh-1.3.3.tgz#7f133285ccd7f2c2c7fc897b771b53d95a2b2c7e" - integrity sha512-NKzJmQzJfEEma3w5cJNcUMxoXfDjz0Zj0eyCalHn2E6VOwlzjZo0yuO2fcBSf8zhFuVCL/82/r5gRcoi6aEPVQ== +is-ssh@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/is-ssh/-/is-ssh-1.4.0.tgz#4f8220601d2839d8fa624b3106f8e8884f01b8b2" + integrity sha512-x7+VxdxOdlV3CYpjvRLBv5Lo9OJerlYanjwFrPR9fuGPjCiNiCzFgAWpiLAohSbsnH4ZAys3SBh+hq5rJosxUQ== dependencies: - protocols "^1.1.0" + protocols "^2.0.1" is-stream@^2.0.0: version "2.0.1" @@ -5445,6 +5581,11 @@ is-unc-path@^1.0.0: dependencies: unc-path-regex "^0.1.2" +is-unicode-supported@^0.1.0: + version "0.1.0" + resolved "https://registry.yarnpkg.com/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz#3f26c76a809593b52bfa2ecb5710ed2779b522a7" + integrity sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw== + is-utf8@^0.2.0, is-utf8@^0.2.1: version "0.2.1" resolved "https://registry.yarnpkg.com/is-utf8/-/is-utf8-0.2.1.tgz#4b0da1442104d1b336340e80797e865cf39f7d72" @@ -5460,6 +5601,13 @@ is-windows@^1.0.1, is-windows@^1.0.2: resolved "https://registry.yarnpkg.com/is-windows/-/is-windows-1.0.2.tgz#d1850eb9791ecd18e6182ce12a30f396634bb19d" integrity sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA== +is-wsl@^2.2.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/is-wsl/-/is-wsl-2.2.0.tgz#74a4c76e77ca9fd3f932f290c17ea326cd157271" + integrity sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww== + dependencies: + is-docker "^2.0.0" + isarray@1.0.0, isarray@~1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11" @@ -5517,9 +5665,9 @@ istanbul-lib-source-maps@^4.0.0: source-map "^0.6.1" istanbul-reports@^3.1.3: - version "3.1.4" - resolved "https://registry.yarnpkg.com/istanbul-reports/-/istanbul-reports-3.1.4.tgz#1b6f068ecbc6c331040aab5741991273e609e40c" - integrity sha512-r1/DshN4KSE7xWEknZLLLLDn5CJybV3nw01VTkp6D5jzLuELlcbudfj/eSQFvrKsJuTVCGnePO7ho82Nw9zzfw== + version "3.1.5" + resolved "https://registry.yarnpkg.com/istanbul-reports/-/istanbul-reports-3.1.5.tgz#cc9a6ab25cb25659810e4785ed9d9fb742578bae" + integrity sha512-nUsEMa9pBt/NOHqbcbeJEgqIlY/K7rVWUX6Lql2orY5e9roQOthbR3vtY4zzf2orPELg80fnxxk9zUyPlgwD1w== dependencies: html-escaper "^2.0.0" istanbul-lib-report "^3.0.0" @@ -5532,221 +5680,196 @@ istextorbinary@^3.0.0: binaryextensions "^2.2.0" textextensions "^3.2.0" -ix@4.5.2: - version "4.5.2" - resolved "https://registry.yarnpkg.com/ix/-/ix-4.5.2.tgz#c91163d38805c5b427902d7cc2ee35ccee0364a5" - integrity sha512-Cm0uEpZd2qU+7DVeyyBQeceafggvPD3xe6LDKUU4YnmOzAFIz0CbxwufRfxywU/5easfCX1XEYcOAkDJUuUtiw== +ix@5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/ix/-/ix-5.0.0.tgz#b9e292f79b1876bbf696809fe86e42930bdbfcd4" + integrity sha512-6LyyrHnvNrSy5pKtW/KA+KKusHrB223aBJCJlIGPN7QBfDkEEtNrAkAz9lLLShIcdJntq6BiPCHuKaCM/9wwXw== dependencies: "@types/node" "^13.7.4" tslib "^2.3.0" -jest-changed-files@^28.0.2: - version "28.0.2" - resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-28.0.2.tgz#7d7810660a5bd043af9e9cfbe4d58adb05e91531" - integrity sha512-QX9u+5I2s54ZnGoMEjiM2WeBvJR2J7w/8ZUmH2um/WLAuGAYFQcsVXY9+1YL6k0H/AGUdH8pXUAv6erDqEsvIA== +jest-changed-files@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-28.1.3.tgz#d9aeee6792be3686c47cb988a8eaf82ff4238831" + integrity sha512-esaOfUWJXk2nfZt9SPyC8gA1kNfdKLkQWyzsMlqq8msYSlNKfmZxfRgZn4Cd4MGVUF+7v6dBs0d5TOAKa7iIiA== dependencies: execa "^5.0.0" - throat "^6.0.1" + p-limit "^3.1.0" -jest-circus@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-28.1.0.tgz#e229f590911bd54d60efaf076f7acd9360296dae" - integrity sha512-rNYfqfLC0L0zQKRKsg4n4J+W1A2fbyGH7Ss/kDIocp9KXD9iaL111glsLu7+Z7FHuZxwzInMDXq+N1ZIBkI/TQ== +jest-circus@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-28.1.3.tgz#d14bd11cf8ee1a03d69902dc47b6bd4634ee00e4" + integrity sha512-cZ+eS5zc79MBwt+IhQhiEp0OeBddpc1n8MBo1nMB8A7oPMKEO+Sre+wHaLJexQUj9Ya/8NOBY0RESUgYjB6fow== dependencies: - "@jest/environment" "^28.1.0" - "@jest/expect" "^28.1.0" - "@jest/test-result" "^28.1.0" - "@jest/types" "^28.1.0" + "@jest/environment" "^28.1.3" + "@jest/expect" "^28.1.3" + "@jest/test-result" "^28.1.3" + "@jest/types" "^28.1.3" "@types/node" "*" chalk "^4.0.0" co "^4.6.0" dedent "^0.7.0" is-generator-fn "^2.0.0" - jest-each "^28.1.0" - jest-matcher-utils "^28.1.0" - jest-message-util "^28.1.0" - jest-runtime "^28.1.0" - jest-snapshot "^28.1.0" - jest-util "^28.1.0" - pretty-format "^28.1.0" + jest-each "^28.1.3" + jest-matcher-utils "^28.1.3" + jest-message-util "^28.1.3" + jest-runtime "^28.1.3" + jest-snapshot "^28.1.3" + jest-util "^28.1.3" + p-limit "^3.1.0" + pretty-format "^28.1.3" slash "^3.0.0" stack-utils "^2.0.3" - throat "^6.0.1" -jest-cli@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-28.1.0.tgz#cd1d8adb9630102d5ba04a22895f63decdd7ac1f" - integrity sha512-fDJRt6WPRriHrBsvvgb93OxgajHHsJbk4jZxiPqmZbMDRcHskfJBBfTyjFko0jjfprP544hOktdSi9HVgl4VUQ== +jest-cli@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-28.1.3.tgz#558b33c577d06de55087b8448d373b9f654e46b2" + integrity sha512-roY3kvrv57Azn1yPgdTebPAXvdR2xfezaKKYzVxZ6It/5NCxzJym6tUI5P1zkdWhfUYkxEI9uZWcQdaFLo8mJQ== dependencies: - "@jest/core" "^28.1.0" - "@jest/test-result" "^28.1.0" - "@jest/types" "^28.1.0" + "@jest/core" "^28.1.3" + "@jest/test-result" "^28.1.3" + "@jest/types" "^28.1.3" chalk "^4.0.0" exit "^0.1.2" graceful-fs "^4.2.9" import-local "^3.0.2" - jest-config "^28.1.0" - jest-util "^28.1.0" - jest-validate "^28.1.0" + jest-config "^28.1.3" + jest-util "^28.1.3" + jest-validate "^28.1.3" prompts "^2.0.1" yargs "^17.3.1" -jest-config@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-28.1.0.tgz#fca22ca0760e746fe1ce1f9406f6b307ab818501" - integrity sha512-aOV80E9LeWrmflp7hfZNn/zGA4QKv/xsn2w8QCBP0t0+YqObuCWTSgNbHJ0j9YsTuCO08ZR/wsvlxqqHX20iUA== +jest-config@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-28.1.3.tgz#e315e1f73df3cac31447eed8b8740a477392ec60" + integrity sha512-MG3INjByJ0J4AsNBm7T3hsuxKQqFIiRo/AUqb1q9LRKI5UU6Aar9JHbr9Ivn1TVwfUD9KirRoM/T6u8XlcQPHQ== dependencies: "@babel/core" "^7.11.6" - "@jest/test-sequencer" "^28.1.0" - "@jest/types" "^28.1.0" - babel-jest "^28.1.0" + "@jest/test-sequencer" "^28.1.3" + "@jest/types" "^28.1.3" + babel-jest "^28.1.3" chalk "^4.0.0" ci-info "^3.2.0" deepmerge "^4.2.2" glob "^7.1.3" graceful-fs "^4.2.9" - jest-circus "^28.1.0" - jest-environment-node "^28.1.0" + jest-circus "^28.1.3" + jest-environment-node "^28.1.3" jest-get-type "^28.0.2" jest-regex-util "^28.0.2" - jest-resolve "^28.1.0" - jest-runner "^28.1.0" - jest-util "^28.1.0" - jest-validate "^28.1.0" + jest-resolve "^28.1.3" + jest-runner "^28.1.3" + jest-util "^28.1.3" + jest-validate "^28.1.3" micromatch "^4.0.4" parse-json "^5.2.0" - pretty-format "^28.1.0" + pretty-format "^28.1.3" slash "^3.0.0" strip-json-comments "^3.1.1" -jest-diff@^27.5.1: - version "27.5.1" - resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-27.5.1.tgz#a07f5011ac9e6643cf8a95a462b7b1ecf6680def" - integrity sha512-m0NvkX55LDt9T4mctTEgnZk3fmEg3NRYutvMPWM/0iPnkFj2wIeF45O1718cMSOFO1vINkqmxqD8vE37uTEbqw== - dependencies: - chalk "^4.0.0" - diff-sequences "^27.5.1" - jest-get-type "^27.5.1" - pretty-format "^27.5.1" - -jest-diff@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-28.1.0.tgz#77686fef899ec1873dbfbf9330e37dd429703269" - integrity sha512-8eFd3U3OkIKRtlasXfiAQfbovgFgRDb0Ngcs2E+FMeBZ4rUezqIaGjuyggJBp+llosQXNEWofk/Sz4Hr5gMUhA== +jest-diff@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-28.1.3.tgz#948a192d86f4e7a64c5264ad4da4877133d8792f" + integrity sha512-8RqP1B/OXzjjTWkqMX67iqgwBVJRgCyKD3L9nq+6ZqJMdvjE8RgHktqZ6jNrkdMT+dJuYNI3rhQpxaz7drJHfw== dependencies: chalk "^4.0.0" - diff-sequences "^28.0.2" + diff-sequences "^28.1.1" jest-get-type "^28.0.2" - pretty-format "^28.1.0" + pretty-format "^28.1.3" -jest-docblock@^28.0.2: - version "28.0.2" - resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-28.0.2.tgz#3cab8abea53275c9d670cdca814fc89fba1298c2" - integrity sha512-FH10WWw5NxLoeSdQlJwu+MTiv60aXV/t8KEwIRGEv74WARE1cXIqh1vGdy2CraHuWOOrnzTWj/azQKqW4fO7xg== +jest-docblock@^28.1.1: + version "28.1.1" + resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-28.1.1.tgz#6f515c3bf841516d82ecd57a62eed9204c2f42a8" + integrity sha512-3wayBVNiOYx0cwAbl9rwm5kKFP8yHH3d/fkEaL02NPTkDojPtheGB7HZSFY4wzX+DxyrvhXz0KSCVksmCknCuA== dependencies: detect-newline "^3.0.0" -jest-each@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-28.1.0.tgz#54ae66d6a0a5b1913e9a87588d26c2687c39458b" - integrity sha512-a/XX02xF5NTspceMpHujmOexvJ4GftpYXqr6HhhmKmExtMXsyIN/fvanQlt/BcgFoRKN4OCXxLQKth9/n6OPFg== +jest-each@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-28.1.3.tgz#bdd1516edbe2b1f3569cfdad9acd543040028f81" + integrity sha512-arT1z4sg2yABU5uogObVPvSlSMQlDA48owx07BDPAiasW0yYpYHYOo4HHLz9q0BVzDVU4hILFjzJw0So9aCL/g== dependencies: - "@jest/types" "^28.1.0" + "@jest/types" "^28.1.3" chalk "^4.0.0" jest-get-type "^28.0.2" - jest-util "^28.1.0" - pretty-format "^28.1.0" + jest-util "^28.1.3" + pretty-format "^28.1.3" -jest-environment-node@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-28.1.0.tgz#6ed2150aa31babba0c488c5b4f4d813a585c68e6" - integrity sha512-gBLZNiyrPw9CSMlTXF1yJhaBgWDPVvH0Pq6bOEwGMXaYNzhzhw2kA/OijNF8egbCgDS0/veRv97249x2CX+udQ== +jest-environment-node@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-28.1.3.tgz#7e74fe40eb645b9d56c0c4b70ca4357faa349be5" + integrity sha512-ugP6XOhEpjAEhGYvp5Xj989ns5cB1K6ZdjBYuS30umT4CQEETaxSiPcZ/E1kFktX4GkrcM4qu07IIlDYX1gp+A== dependencies: - "@jest/environment" "^28.1.0" - "@jest/fake-timers" "^28.1.0" - "@jest/types" "^28.1.0" + "@jest/environment" "^28.1.3" + "@jest/fake-timers" "^28.1.3" + "@jest/types" "^28.1.3" "@types/node" "*" - jest-mock "^28.1.0" - jest-util "^28.1.0" - -jest-get-type@^27.5.1: - version "27.5.1" - resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-27.5.1.tgz#3cd613c507b0f7ace013df407a1c1cd578bcb4f1" - integrity sha512-2KY95ksYSaK7DMBWQn6dQz3kqAf3BB64y2udeG+hv4KfSOb9qwcYQstTJc1KCbsix+wLZWZYN8t7nwX3GOBLRw== + jest-mock "^28.1.3" + jest-util "^28.1.3" jest-get-type@^28.0.2: version "28.0.2" resolved "https://registry.yarnpkg.com/jest-get-type/-/jest-get-type-28.0.2.tgz#34622e628e4fdcd793d46db8a242227901fcf203" integrity sha512-ioj2w9/DxSYHfOm5lJKCdcAmPJzQXmbM/Url3rhlghrPvT3tt+7a/+oXc9azkKmLvoiXjtV83bEWqi+vs5nlPA== -jest-haste-map@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-28.1.0.tgz#6c1ee2daf1c20a3e03dbd8e5b35c4d73d2349cf0" - integrity sha512-xyZ9sXV8PtKi6NCrJlmq53PyNVHzxmcfXNVvIRHpHmh1j/HChC4pwKgyjj7Z9us19JMw8PpQTJsFWOsIfT93Dw== +jest-haste-map@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-28.1.3.tgz#abd5451129a38d9841049644f34b034308944e2b" + integrity sha512-3S+RQWDXccXDKSWnkHa/dPwt+2qwA8CJzR61w3FoYCvoo3Pn8tvGcysmMF0Bj0EX5RYvAI2EIvC57OmotfdtKA== dependencies: - "@jest/types" "^28.1.0" + "@jest/types" "^28.1.3" "@types/graceful-fs" "^4.1.3" "@types/node" "*" anymatch "^3.0.3" fb-watchman "^2.0.0" graceful-fs "^4.2.9" jest-regex-util "^28.0.2" - jest-util "^28.1.0" - jest-worker "^28.1.0" + jest-util "^28.1.3" + jest-worker "^28.1.3" micromatch "^4.0.4" - walker "^1.0.7" + walker "^1.0.8" optionalDependencies: fsevents "^2.3.2" -jest-leak-detector@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-28.1.0.tgz#b65167776a8787443214d6f3f54935a4c73c8a45" - integrity sha512-uIJDQbxwEL2AMMs2xjhZl2hw8s77c3wrPaQ9v6tXJLGaaQ+4QrNJH5vuw7hA7w/uGT/iJ42a83opAqxGHeyRIA== +jest-leak-detector@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-28.1.3.tgz#a6685d9b074be99e3adee816ce84fd30795e654d" + integrity sha512-WFVJhnQsiKtDEo5lG2mM0v40QWnBM+zMdHHyJs8AWZ7J0QZJS59MsyKeJHWhpBZBH32S48FOVvGyOFT1h0DlqA== dependencies: jest-get-type "^28.0.2" - pretty-format "^28.1.0" - -jest-matcher-utils@^27.0.0: - version "27.5.1" - resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-27.5.1.tgz#9c0cdbda8245bc22d2331729d1091308b40cf8ab" - integrity sha512-z2uTx/T6LBaCoNWNFWwChLBKYxTMcGBRjAt+2SbP929/Fflb9aa5LGma654Rz8z9HLxsrUaYzxE9T/EFIL/PAw== - dependencies: - chalk "^4.0.0" - jest-diff "^27.5.1" - jest-get-type "^27.5.1" - pretty-format "^27.5.1" + pretty-format "^28.1.3" -jest-matcher-utils@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-28.1.0.tgz#2ae398806668eeabd293c61712227cb94b250ccf" - integrity sha512-onnax0n2uTLRQFKAjC7TuaxibrPSvZgKTcSCnNUz/tOjJ9UhxNm7ZmPpoQavmTDUjXvUQ8KesWk2/VdrxIFzTQ== +jest-matcher-utils@^28.0.0, jest-matcher-utils@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-28.1.3.tgz#5a77f1c129dd5ba3b4d7fc20728806c78893146e" + integrity sha512-kQeJ7qHemKfbzKoGjHHrRKH6atgxMk8Enkk2iPQ3XwO6oE/KYD8lMYOziCkeSB9G4adPM4nR1DE8Tf5JeWH6Bw== dependencies: chalk "^4.0.0" - jest-diff "^28.1.0" + jest-diff "^28.1.3" jest-get-type "^28.0.2" - pretty-format "^28.1.0" + pretty-format "^28.1.3" -jest-message-util@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-28.1.0.tgz#7e8f0b9049e948e7b94c2a52731166774ba7d0af" - integrity sha512-RpA8mpaJ/B2HphDMiDlrAZdDytkmwFqgjDZovM21F35lHGeUeCvYmm6W+sbQ0ydaLpg5bFAUuWG1cjqOl8vqrw== +jest-message-util@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-28.1.3.tgz#232def7f2e333f1eecc90649b5b94b0055e7c43d" + integrity sha512-PFdn9Iewbt575zKPf1286Ht9EPoJmYT7P0kY+RibeYZ2XtOr53pDLEFoTWXbd1h4JiGiWpTBC84fc8xMXQMb7g== dependencies: "@babel/code-frame" "^7.12.13" - "@jest/types" "^28.1.0" + "@jest/types" "^28.1.3" "@types/stack-utils" "^2.0.0" chalk "^4.0.0" graceful-fs "^4.2.9" micromatch "^4.0.4" - pretty-format "^28.1.0" + pretty-format "^28.1.3" slash "^3.0.0" stack-utils "^2.0.3" -jest-mock@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-28.1.0.tgz#ccc7cc12a9b330b3182db0c651edc90d163ff73e" - integrity sha512-H7BrhggNn77WhdL7O1apG0Q/iwl0Bdd5E1ydhCJzL3oBLh/UYxAwR3EJLsBZ9XA3ZU4PA3UNw4tQjduBTCTmLw== +jest-mock@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-28.1.3.tgz#d4e9b1fc838bea595c77ab73672ebf513ab249da" + integrity sha512-o3J2jr6dMMWYVH4Lh/NKmDXdosrsJgi4AviS8oXLujcjpCMBb1FMsblDnOXKZKfSiHLxYub1eS0IHuRXsio9eA== dependencies: - "@jest/types" "^28.1.0" + "@jest/types" "^28.1.3" "@types/node" "*" jest-pnp-resolver@^1.2.2: @@ -5759,81 +5882,81 @@ jest-regex-util@^28.0.2: resolved "https://registry.yarnpkg.com/jest-regex-util/-/jest-regex-util-28.0.2.tgz#afdc377a3b25fb6e80825adcf76c854e5bf47ead" integrity sha512-4s0IgyNIy0y9FK+cjoVYoxamT7Zeo7MhzqRGx7YDYmaQn1wucY9rotiGkBzzcMXTtjrCAP/f7f+E0F7+fxPNdw== -jest-resolve-dependencies@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-28.1.0.tgz#167becb8bee6e20b5ef4a3a728ec67aef6b0b79b" - integrity sha512-Ue1VYoSZquPwEvng7Uefw8RmZR+me/1kr30H2jMINjGeHgeO/JgrR6wxj2ofkJ7KSAA11W3cOrhNCbj5Dqqd9g== +jest-resolve-dependencies@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-28.1.3.tgz#8c65d7583460df7275c6ea2791901fa975c1fe66" + integrity sha512-qa0QO2Q0XzQoNPouMbCc7Bvtsem8eQgVPNkwn9LnS+R2n8DaVDPL/U1gngC0LTl1RYXJU0uJa2BMC2DbTfFrHA== dependencies: jest-regex-util "^28.0.2" - jest-snapshot "^28.1.0" + jest-snapshot "^28.1.3" -jest-resolve@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-28.1.0.tgz#b1f32748a6cee7d1779c7ef639c0a87078de3d35" - integrity sha512-vvfN7+tPNnnhDvISuzD1P+CRVP8cK0FHXRwPAcdDaQv4zgvwvag2n55/h5VjYcM5UJG7L4TwE5tZlzcI0X2Lhw== +jest-resolve@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-28.1.3.tgz#cfb36100341ddbb061ec781426b3c31eb51aa0a8" + integrity sha512-Z1W3tTjE6QaNI90qo/BJpfnvpxtaFTFw5CDgwpyE/Kz8U/06N1Hjf4ia9quUhCh39qIGWF1ZuxFiBiJQwSEYKQ== dependencies: chalk "^4.0.0" graceful-fs "^4.2.9" - jest-haste-map "^28.1.0" + jest-haste-map "^28.1.3" jest-pnp-resolver "^1.2.2" - jest-util "^28.1.0" - jest-validate "^28.1.0" + jest-util "^28.1.3" + jest-validate "^28.1.3" resolve "^1.20.0" resolve.exports "^1.1.0" slash "^3.0.0" -jest-runner@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-28.1.0.tgz#aefe2a1e618a69baa0b24a50edc54fdd7e728eaa" - integrity sha512-FBpmuh1HB2dsLklAlRdOxNTTHKFR6G1Qmd80pVDvwbZXTriqjWqjei5DKFC1UlM732KjYcE6yuCdiF0WUCOS2w== +jest-runner@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-28.1.3.tgz#5eee25febd730b4713a2cdfd76bdd5557840f9a1" + integrity sha512-GkMw4D/0USd62OVO0oEgjn23TM+YJa2U2Wu5zz9xsQB1MxWKDOlrnykPxnMsN0tnJllfLPinHTka61u0QhaxBA== dependencies: - "@jest/console" "^28.1.0" - "@jest/environment" "^28.1.0" - "@jest/test-result" "^28.1.0" - "@jest/transform" "^28.1.0" - "@jest/types" "^28.1.0" + "@jest/console" "^28.1.3" + "@jest/environment" "^28.1.3" + "@jest/test-result" "^28.1.3" + "@jest/transform" "^28.1.3" + "@jest/types" "^28.1.3" "@types/node" "*" chalk "^4.0.0" emittery "^0.10.2" graceful-fs "^4.2.9" - jest-docblock "^28.0.2" - jest-environment-node "^28.1.0" - jest-haste-map "^28.1.0" - jest-leak-detector "^28.1.0" - jest-message-util "^28.1.0" - jest-resolve "^28.1.0" - jest-runtime "^28.1.0" - jest-util "^28.1.0" - jest-watcher "^28.1.0" - jest-worker "^28.1.0" + jest-docblock "^28.1.1" + jest-environment-node "^28.1.3" + jest-haste-map "^28.1.3" + jest-leak-detector "^28.1.3" + jest-message-util "^28.1.3" + jest-resolve "^28.1.3" + jest-runtime "^28.1.3" + jest-util "^28.1.3" + jest-watcher "^28.1.3" + jest-worker "^28.1.3" + p-limit "^3.1.0" source-map-support "0.5.13" - throat "^6.0.1" - -jest-runtime@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-28.1.0.tgz#4847dcb2a4eb4b0f9eaf41306897e51fb1665631" - integrity sha512-wNYDiwhdH/TV3agaIyVF0lsJ33MhyujOe+lNTUiolqKt8pchy1Hq4+tDMGbtD5P/oNLA3zYrpx73T9dMTOCAcg== - dependencies: - "@jest/environment" "^28.1.0" - "@jest/fake-timers" "^28.1.0" - "@jest/globals" "^28.1.0" - "@jest/source-map" "^28.0.2" - "@jest/test-result" "^28.1.0" - "@jest/transform" "^28.1.0" - "@jest/types" "^28.1.0" + +jest-runtime@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-28.1.3.tgz#a57643458235aa53e8ec7821949e728960d0605f" + integrity sha512-NU+881ScBQQLc1JHG5eJGU7Ui3kLKrmwCPPtYsJtBykixrM2OhVQlpMmFWJjMyDfdkGgBMNjXCGB/ebzsgNGQw== + dependencies: + "@jest/environment" "^28.1.3" + "@jest/fake-timers" "^28.1.3" + "@jest/globals" "^28.1.3" + "@jest/source-map" "^28.1.2" + "@jest/test-result" "^28.1.3" + "@jest/transform" "^28.1.3" + "@jest/types" "^28.1.3" chalk "^4.0.0" cjs-module-lexer "^1.0.0" collect-v8-coverage "^1.0.0" execa "^5.0.0" glob "^7.1.3" graceful-fs "^4.2.9" - jest-haste-map "^28.1.0" - jest-message-util "^28.1.0" - jest-mock "^28.1.0" + jest-haste-map "^28.1.3" + jest-message-util "^28.1.3" + jest-mock "^28.1.3" jest-regex-util "^28.0.2" - jest-resolve "^28.1.0" - jest-snapshot "^28.1.0" - jest-util "^28.1.0" + jest-resolve "^28.1.3" + jest-snapshot "^28.1.3" + jest-util "^28.1.3" slash "^3.0.0" strip-bom "^4.0.0" @@ -5845,33 +5968,33 @@ jest-silent-reporter@0.5.0: chalk "^4.0.0" jest-util "^26.0.0" -jest-snapshot@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-28.1.0.tgz#4b74fa8816707dd10fe9d551c2c258e5a67b53b6" - integrity sha512-ex49M2ZrZsUyQLpLGxQtDbahvgBjlLPgklkqGM0hq/F7W/f8DyqZxVHjdy19QKBm4O93eDp+H5S23EiTbbUmHw== +jest-snapshot@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-28.1.3.tgz#17467b3ab8ddb81e2f605db05583d69388fc0668" + integrity sha512-4lzMgtiNlc3DU/8lZfmqxN3AYD6GGLbl+72rdBpXvcV+whX7mDrREzkPdp2RnmfIiWBg1YbuFSkXduF2JcafJg== dependencies: "@babel/core" "^7.11.6" "@babel/generator" "^7.7.2" "@babel/plugin-syntax-typescript" "^7.7.2" "@babel/traverse" "^7.7.2" "@babel/types" "^7.3.3" - "@jest/expect-utils" "^28.1.0" - "@jest/transform" "^28.1.0" - "@jest/types" "^28.1.0" + "@jest/expect-utils" "^28.1.3" + "@jest/transform" "^28.1.3" + "@jest/types" "^28.1.3" "@types/babel__traverse" "^7.0.6" "@types/prettier" "^2.1.5" babel-preset-current-node-syntax "^1.0.0" chalk "^4.0.0" - expect "^28.1.0" + expect "^28.1.3" graceful-fs "^4.2.9" - jest-diff "^28.1.0" + jest-diff "^28.1.3" jest-get-type "^28.0.2" - jest-haste-map "^28.1.0" - jest-matcher-utils "^28.1.0" - jest-message-util "^28.1.0" - jest-util "^28.1.0" + jest-haste-map "^28.1.3" + jest-matcher-utils "^28.1.3" + jest-message-util "^28.1.3" + jest-util "^28.1.3" natural-compare "^1.4.0" - pretty-format "^28.1.0" + pretty-format "^28.1.3" semver "^7.3.5" jest-util@^26.0.0: @@ -5886,42 +6009,42 @@ jest-util@^26.0.0: is-ci "^2.0.0" micromatch "^4.0.2" -jest-util@^28.0.0, jest-util@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-28.1.0.tgz#d54eb83ad77e1dd441408738c5a5043642823be5" - integrity sha512-qYdCKD77k4Hwkose2YBEqQk7PzUf/NSE+rutzceduFveQREeH6b+89Dc9+wjX9dAwHcgdx4yedGA3FQlU/qCTA== +jest-util@^28.0.0, jest-util@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-28.1.3.tgz#f4f932aa0074f0679943220ff9cbba7e497028b0" + integrity sha512-XdqfpHwpcSRko/C35uLYFM2emRAltIIKZiJ9eAmhjsj0CqZMa0p1ib0R5fWIqGhn1a103DebTbpqIaP1qCQ6tQ== dependencies: - "@jest/types" "^28.1.0" + "@jest/types" "^28.1.3" "@types/node" "*" chalk "^4.0.0" ci-info "^3.2.0" graceful-fs "^4.2.9" picomatch "^2.2.3" -jest-validate@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-28.1.0.tgz#8a6821f48432aba9f830c26e28226ad77b9a0e18" - integrity sha512-Lly7CJYih3vQBfjLeANGgBSBJ7pEa18cxpQfQEq2go2xyEzehnHfQTjoUia8xUv4x4J80XKFIDwJJThXtRFQXQ== +jest-validate@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-28.1.3.tgz#e322267fd5e7c64cea4629612c357bbda96229df" + integrity sha512-SZbOGBWEsaTxBGCOpsRWlXlvNkvTkY0XxRfh7zYmvd8uL5Qzyg0CHAXiXKROflh801quA6+/DsT4ODDthOC/OA== dependencies: - "@jest/types" "^28.1.0" + "@jest/types" "^28.1.3" camelcase "^6.2.0" chalk "^4.0.0" jest-get-type "^28.0.2" leven "^3.1.0" - pretty-format "^28.1.0" + pretty-format "^28.1.3" -jest-watcher@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-28.1.0.tgz#aaa7b4164a4e77eeb5f7d7b25ede5e7b4e9c9aaf" - integrity sha512-tNHMtfLE8Njcr2IRS+5rXYA4BhU90gAOwI9frTGOqd+jX0P/Au/JfRSNqsf5nUTcWdbVYuLxS1KjnzILSoR5hA== +jest-watcher@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-28.1.3.tgz#c6023a59ba2255e3b4c57179fc94164b3e73abd4" + integrity sha512-t4qcqj9hze+jviFPUN3YAtAEeFnr/azITXQEMARf5cMwKY2SMBRnCQTXLixTl20OR6mLh9KLMrgVJgJISym+1g== dependencies: - "@jest/test-result" "^28.1.0" - "@jest/types" "^28.1.0" + "@jest/test-result" "^28.1.3" + "@jest/types" "^28.1.3" "@types/node" "*" ansi-escapes "^4.2.1" chalk "^4.0.0" emittery "^0.10.2" - jest-util "^28.1.0" + jest-util "^28.1.3" string-length "^4.0.1" jest-worker@^27.4.5: @@ -5933,29 +6056,37 @@ jest-worker@^27.4.5: merge-stream "^2.0.0" supports-color "^8.0.0" -jest-worker@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-28.1.0.tgz#ced54757a035e87591e1208253a6e3aac1a855e5" - integrity sha512-ZHwM6mNwaWBR52Snff8ZvsCTqQsvhCxP/bT1I6T6DAnb6ygkshsyLQIMxFwHpYxht0HOoqt23JlC01viI7T03A== +jest-worker@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-28.1.3.tgz#7e3c4ce3fa23d1bb6accb169e7f396f98ed4bb98" + integrity sha512-CqRA220YV/6jCo8VWvAt1KKx6eek1VIHMPeLEbpcfSfkEeWyBNppynM/o6q+Wmw+sOhos2ml34wZbSX3G13//g== dependencies: "@types/node" "*" merge-stream "^2.0.0" supports-color "^8.0.0" -jest@28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/jest/-/jest-28.1.0.tgz#f420e41c8f2395b9a30445a97189ebb57593d831" - integrity sha512-TZR+tHxopPhzw3c3560IJXZWLNHgpcz1Zh0w5A65vynLGNcg/5pZ+VildAd7+XGOu6jd58XMY/HNn0IkZIXVXg== +jest@28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/jest/-/jest-28.1.3.tgz#e9c6a7eecdebe3548ca2b18894a50f45b36dfc6b" + integrity sha512-N4GT5on8UkZgH0O5LUavMRV1EDEhNTL0KEfRmDIeZHSV7p2XgLoY9t9VDUgL6o+yfdgYHVxuz81G8oB9VG5uyA== dependencies: - "@jest/core" "^28.1.0" + "@jest/core" "^28.1.3" + "@jest/types" "^28.1.3" import-local "^3.0.2" - jest-cli "^28.1.0" + jest-cli "^28.1.3" js-tokens@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-4.0.0.tgz#19203fb59991df98e3a287050d4647cdeaf32499" integrity sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ== +js-yaml@4.1.0, js-yaml@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-4.1.0.tgz#c1fb65f8f5017901cdd2c951864ba18458a10602" + integrity sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA== + dependencies: + argparse "^2.0.1" + js-yaml@^3.13.1: version "3.14.1" resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.14.1.tgz#dae812fdb3825fa306609a8717383c50c36a0537" @@ -5964,13 +6095,6 @@ js-yaml@^3.13.1: argparse "^1.0.7" esprima "^4.0.0" -js-yaml@^4.1.0: - version "4.1.0" - resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-4.1.0.tgz#c1fb65f8f5017901cdd2c951864ba18458a10602" - integrity sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA== - dependencies: - argparse "^2.0.1" - jsesc@^2.5.1: version "2.5.2" resolved "https://registry.yarnpkg.com/jsesc/-/jsesc-2.5.2.tgz#80564d2e483dacf6e8ef209650a67df3f0c283a4" @@ -6020,16 +6144,28 @@ json2csv@^5.0.6: jsonparse "^1.3.1" lodash.get "^4.4.2" +json5@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.1.tgz#779fb0018604fa854eacbf6252180d83543e3dbe" + integrity sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow== + dependencies: + minimist "^1.2.0" + json5@^2.2.1: version "2.2.1" resolved "https://registry.yarnpkg.com/json5/-/json5-2.2.1.tgz#655d50ed1e6f95ad1a3caababd2b0efda10b395c" integrity sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA== -jsonc-parser@^3.0.0: +jsonc-parser@3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/jsonc-parser/-/jsonc-parser-3.0.0.tgz#abdd785701c7e7eaca8a9ec8cf070ca51a745a22" integrity sha512-fQzRfAbIBnR0IQvftw9FJveWiHp72Fg20giDrHz6TdfB12UH/uue0D3hm57UB5KgAVuniLMCaS8P1IMj9NR7cA== +jsonc-parser@^3.0.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/jsonc-parser/-/jsonc-parser-3.1.0.tgz#73b8f0e5c940b83d03476bc2e51a20ef0932615d" + integrity sha512-DRf0QjnNeCUds3xTjKlQQ3DpJD51GvDjJfnxUVWg6PZTo2otSm+slzNAxU/35hF8/oJIKoG9slq30JYOsF2azg== + jsonfile@^6.0.1: version "6.1.0" resolved "https://registry.yarnpkg.com/jsonfile/-/jsonfile-6.1.0.tgz#bc55b2634793c679ec6403094eb13698a6ec0aae" @@ -6055,9 +6191,9 @@ just-diff-apply@^5.2.0: integrity sha512-dgFenZnMsc1xGNqgdtgnh7DK+Oy352CE3VZLbzcbQpsBs9iI2K3M0IRrdgREZ72eItTjbl0suRyvKRdVQa9GbA== just-diff@^5.0.1: - version "5.0.2" - resolved "https://registry.yarnpkg.com/just-diff/-/just-diff-5.0.2.tgz#68854c94280c37d28cb266d8f29bdd2cd29f003e" - integrity sha512-uGd6F+eIZ4T95EinP8ubINGkbEy3jrgBym+6LjW+ja1UG1WQIcEcQ6FLeyXtVJZglk+bj7fvEn+Cu2LBxkgiYQ== + version "5.0.3" + resolved "https://registry.yarnpkg.com/just-diff/-/just-diff-5.0.3.tgz#4c9c514dec5526b25ab977590e3c39a0cf271554" + integrity sha512-a8p80xcpJ6sdurk5PxDKb4mav9MeKjA3zFKZpCWBIfvg8mznfnmb13MKZvlrwJ+Lhis0wM3uGAzE0ArhFHvIcg== kind-of@^3.0.2, kind-of@^3.0.3, kind-of@^3.2.0: version "3.2.2" @@ -6089,9 +6225,9 @@ kleur@^3.0.3: integrity sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w== kleur@^4.1.4: - version "4.1.4" - resolved "https://registry.yarnpkg.com/kleur/-/kleur-4.1.4.tgz#8c202987d7e577766d039a8cd461934c01cda04d" - integrity sha512-8QADVssbrFjivHWQU7KkMgptGTl6WAcSdlbBPY4uNF+mWr6DGcKrvY2w4FQJoXch7+fKMjj0dRrL75vk3k23OA== + version "4.1.5" + resolved "https://registry.yarnpkg.com/kleur/-/kleur-4.1.5.tgz#95106101795f7050c6c650f350c683febddb1780" + integrity sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ== last-run@^1.1.0: version "1.1.1" @@ -6122,29 +6258,30 @@ lead@^1.0.0: dependencies: flush-write-stream "^1.0.2" -lerna@5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/lerna/-/lerna-5.0.0.tgz#077e35d41fcead5ea223af1862dc25475e1aaf2a" - integrity sha512-dUYmJ7H9k/xHtwKpQWLTNUa1jnFUiW4o4K2LFkRchlIijoIUT4yK/RprIxNvYCrLrEaOdZryvY5UZvSHI2tBxA== - dependencies: - "@lerna/add" "5.0.0" - "@lerna/bootstrap" "5.0.0" - "@lerna/changed" "5.0.0" - "@lerna/clean" "5.0.0" - "@lerna/cli" "5.0.0" - "@lerna/create" "5.0.0" - "@lerna/diff" "5.0.0" - "@lerna/exec" "5.0.0" - "@lerna/import" "5.0.0" - "@lerna/info" "5.0.0" - "@lerna/init" "5.0.0" - "@lerna/link" "5.0.0" - "@lerna/list" "5.0.0" - "@lerna/publish" "5.0.0" - "@lerna/run" "5.0.0" - "@lerna/version" "5.0.0" +lerna@5.3.0: + version "5.3.0" + resolved "https://registry.yarnpkg.com/lerna/-/lerna-5.3.0.tgz#6e529b2cbe3d103c5b0a2f4152888b8d84501b67" + integrity sha512-0Y9xJqleVu0ExGmsw2WM/GkVmxOwtA7OLQFS5ERPKJfnsxH9roTX3a7NPaGQRI2E+tSJLJJGgNSf3WYEqinOqA== + dependencies: + "@lerna/add" "5.3.0" + "@lerna/bootstrap" "5.3.0" + "@lerna/changed" "5.3.0" + "@lerna/clean" "5.3.0" + "@lerna/cli" "5.3.0" + "@lerna/create" "5.3.0" + "@lerna/diff" "5.3.0" + "@lerna/exec" "5.3.0" + "@lerna/import" "5.3.0" + "@lerna/info" "5.3.0" + "@lerna/init" "5.3.0" + "@lerna/link" "5.3.0" + "@lerna/list" "5.3.0" + "@lerna/publish" "5.3.0" + "@lerna/run" "5.3.0" + "@lerna/version" "5.3.0" import-local "^3.0.2" - npmlog "^4.1.2" + npmlog "^6.0.2" + nx ">=14.4.3 < 16" leven@^3.1.0: version "3.1.0" @@ -6159,26 +6296,26 @@ levn@^0.4.1: prelude-ls "^1.2.1" type-check "~0.4.0" -libnpmaccess@^4.0.1: - version "4.0.3" - resolved "https://registry.yarnpkg.com/libnpmaccess/-/libnpmaccess-4.0.3.tgz#dfb0e5b0a53c315a2610d300e46b4ddeb66e7eec" - integrity sha512-sPeTSNImksm8O2b6/pf3ikv4N567ERYEpeKRPSmqlNt1dTZbvgpJIzg5vAhXHpw2ISBsELFRelk0jEahj1c6nQ== +libnpmaccess@^6.0.3: + version "6.0.3" + resolved "https://registry.yarnpkg.com/libnpmaccess/-/libnpmaccess-6.0.3.tgz#473cc3e4aadb2bc713419d92e45d23b070d8cded" + integrity sha512-4tkfUZprwvih2VUZYMozL7EMKgQ5q9VW2NtRyxWtQWlkLTAWHRklcAvBN49CVqEkhUw7vTX2fNgB5LzgUucgYg== dependencies: aproba "^2.0.0" minipass "^3.1.1" - npm-package-arg "^8.1.2" - npm-registry-fetch "^11.0.0" + npm-package-arg "^9.0.1" + npm-registry-fetch "^13.0.0" -libnpmpublish@^4.0.0: - version "4.0.2" - resolved "https://registry.yarnpkg.com/libnpmpublish/-/libnpmpublish-4.0.2.tgz#be77e8bf5956131bcb45e3caa6b96a842dec0794" - integrity sha512-+AD7A2zbVeGRCFI2aO//oUmapCwy7GHqPXFJh3qpToSRNU+tXKJ2YFUgjt04LPPAf2dlEH95s6EhIHM1J7bmOw== +libnpmpublish@^6.0.4: + version "6.0.4" + resolved "https://registry.yarnpkg.com/libnpmpublish/-/libnpmpublish-6.0.4.tgz#adb41ec6b0c307d6f603746a4d929dcefb8f1a0b" + integrity sha512-lvAEYW8mB8QblL6Q/PI/wMzKNvIrF7Kpujf/4fGS/32a2i3jzUXi04TNyIBcK6dQJ34IgywfaKGh+Jq4HYPFmg== dependencies: - normalize-package-data "^3.0.2" - npm-package-arg "^8.1.2" - npm-registry-fetch "^11.0.0" - semver "^7.1.3" - ssri "^8.0.1" + normalize-package-data "^4.0.0" + npm-package-arg "^9.0.1" + npm-registry-fetch "^13.0.0" + semver "^7.3.7" + ssri "^9.0.0" liftoff@^3.1.0: version "3.1.0" @@ -6257,11 +6394,6 @@ locate-path@^6.0.0: dependencies: p-locate "^5.0.0" -lodash._reinterpolate@^3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/lodash._reinterpolate/-/lodash._reinterpolate-3.0.0.tgz#0ccf2d89166af03b3663c796538b75ac6e114d9d" - integrity sha512-xYHt68QRoYGjeeM/XOE1uJtvXQAgvszfBhjV4yvsQH0u2i9I6cI6c6/eG4Hh3UAOVn0y/xAXwmTzEay49Q//HA== - lodash.camelcase@^4.3.0: version "4.3.0" resolved "https://registry.yarnpkg.com/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz#b28aa6288a2b9fc651035c7711f65ab6190331a6" @@ -6297,26 +6429,19 @@ lodash.some@^4.2.2: resolved "https://registry.yarnpkg.com/lodash.some/-/lodash.some-4.6.0.tgz#1bb9f314ef6b8baded13b549169b2a945eb68e4d" integrity sha512-j7MJE+TuT51q9ggt4fSgVqro163BEFjAt3u97IqU+JA2DkWl80nFTrowzLpZ/BnpN7rrl0JA/593NAdd8p/scQ== -lodash.template@^4.5.0: - version "4.5.0" - resolved "https://registry.yarnpkg.com/lodash.template/-/lodash.template-4.5.0.tgz#f976195cf3f347d0d5f52483569fe8031ccce8ab" - integrity sha512-84vYFxIkmidUiFxidA/KjjH9pAycqW+h980j7Fuz5qxRtO9pgB7MDFTdys1N7A5mcucRiDyEq4fusljItR1T/A== - dependencies: - lodash._reinterpolate "^3.0.0" - lodash.templatesettings "^4.0.0" - -lodash.templatesettings@^4.0.0: - version "4.2.0" - resolved "https://registry.yarnpkg.com/lodash.templatesettings/-/lodash.templatesettings-4.2.0.tgz#e481310f049d3cf6d47e912ad09313b154f0fb33" - integrity sha512-stgLz+i3Aa9mZgnjr/O+v9ruKZsPsndy7qPZOchbqk2cnTU1ZaldKK+v7m54WoKIyxiuMZTKT2H81F8BeAc3ZQ== - dependencies: - lodash._reinterpolate "^3.0.0" - -lodash@^4.17.15, lodash@^4.17.19, lodash@^4.17.20, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.7.0: +lodash@^4.17.15, lodash@^4.17.20, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.7.0: version "4.17.21" resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== +log-symbols@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/log-symbols/-/log-symbols-4.1.0.tgz#3fbdbb95b4683ac9fc785111e792e558d4abd503" + integrity sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg== + dependencies: + chalk "^4.1.0" + is-unicode-supported "^0.1.0" + log-update@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/log-update/-/log-update-4.0.0.tgz#589ecd352471f2a1c0c570287543a64dfd20e0a1" @@ -6335,9 +6460,9 @@ lru-cache@^6.0.0: yallist "^4.0.0" lru-cache@^7.4.4, lru-cache@^7.5.1, lru-cache@^7.7.1: - version "7.10.1" - resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-7.10.1.tgz#db577f42a94c168f676b638d15da8fb073448cab" - integrity sha512-BQuhQxPuRl79J5zSXRP+uNzPOyZw2oFI9JLRQ80XswSvg21KMKNtQza9eF42rfI/3Z40RvzBdXgziEkudzjo8A== + version "7.13.2" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-7.13.2.tgz#bb5d3f1deea3f3a7a35c1c44345566a612e09cd0" + integrity sha512-VJL3nIpA79TodY/ctmZEfhASgqekbT574/c4j3jn4bKXbSCnTTCH/KltZyvL2GlV+tGSMtsWyem8DCX7qKTMBA== lru-queue@^0.1.0: version "0.1.0" @@ -6371,10 +6496,10 @@ make-error@1.x, make-error@^1.1.1: resolved "https://registry.yarnpkg.com/make-error/-/make-error-1.3.6.tgz#2eb2e37ea9b67c4891f684a1394799af484cf7a2" integrity sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw== -make-fetch-happen@^10.0.6: - version "10.1.6" - resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-10.1.6.tgz#22b3ac3b077a7cfa80525af12e637e349f21d26e" - integrity sha512-/iKDlRQF0fkxyB/w/duW2yRYrGwBcbJjC37ijgi0CmOZ32bzMc86BCSSAHWvuyRFCB408iBPziTSzazBSrKo3w== +make-fetch-happen@^10.0.3, make-fetch-happen@^10.0.6: + version "10.2.0" + resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-10.2.0.tgz#0bde3914f2f82750b5d48c6d2294d2c74f985e5b" + integrity sha512-OnEfCLofQVJ5zgKwGk55GaqosqKjaR6khQlJY3dBAA+hM25Bc5CmX5rKUfVut+rYA3uidA7zb7AvcglU87rPRg== dependencies: agentkeepalive "^4.2.1" cacache "^16.1.0" @@ -6390,52 +6515,9 @@ make-fetch-happen@^10.0.6: minipass-pipeline "^1.2.4" negotiator "^0.6.3" promise-retry "^2.0.1" - socks-proxy-agent "^6.1.1" + socks-proxy-agent "^7.0.0" ssri "^9.0.0" -make-fetch-happen@^8.0.9: - version "8.0.14" - resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-8.0.14.tgz#aaba73ae0ab5586ad8eaa68bd83332669393e222" - integrity sha512-EsS89h6l4vbfJEtBZnENTOFk8mCRpY5ru36Xe5bcX1KYIli2mkSHqoFsp5O1wMDvTJJzxe/4THpCTtygjeeGWQ== - dependencies: - agentkeepalive "^4.1.3" - cacache "^15.0.5" - http-cache-semantics "^4.1.0" - http-proxy-agent "^4.0.1" - https-proxy-agent "^5.0.0" - is-lambda "^1.0.1" - lru-cache "^6.0.0" - minipass "^3.1.3" - minipass-collect "^1.0.2" - minipass-fetch "^1.3.2" - minipass-flush "^1.0.5" - minipass-pipeline "^1.2.4" - promise-retry "^2.0.1" - socks-proxy-agent "^5.0.0" - ssri "^8.0.0" - -make-fetch-happen@^9.0.1, make-fetch-happen@^9.1.0: - version "9.1.0" - resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-9.1.0.tgz#53085a09e7971433e6765f7971bf63f4e05cb968" - integrity sha512-+zopwDy7DNknmwPQplem5lAZX/eCOzSvSNNcSKm5eVwTkOBzoktEfXsa9L23J/GIRhxRsaxzkPEhrJEpE2F4Gg== - dependencies: - agentkeepalive "^4.1.3" - cacache "^15.2.0" - http-cache-semantics "^4.1.0" - http-proxy-agent "^4.0.1" - https-proxy-agent "^5.0.0" - is-lambda "^1.0.1" - lru-cache "^6.0.0" - minipass "^3.1.3" - minipass-collect "^1.0.2" - minipass-fetch "^1.3.2" - minipass-flush "^1.0.5" - minipass-pipeline "^1.2.4" - negotiator "^0.6.2" - promise-retry "^2.0.1" - socks-proxy-agent "^6.0.0" - ssri "^8.0.0" - make-iterator@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/make-iterator/-/make-iterator-1.0.1.tgz#29b33f312aa8f547c4a5e490f56afcec99133ad6" @@ -6472,10 +6554,10 @@ map-visit@^1.0.0: dependencies: object-visit "^1.0.0" -marked@^4.0.16: - version "4.0.16" - resolved "https://registry.yarnpkg.com/marked/-/marked-4.0.16.tgz#9ec18fc1a723032eb28666100344d9428cf7a264" - integrity sha512-wahonIQ5Jnyatt2fn8KqF/nIqZM8mh3oRu2+l5EANGMhu6RFjiSG52QNE2eWzFMI94HqYSgN184NurgNG6CztA== +marked@^4.0.18: + version "4.0.18" + resolved "https://registry.yarnpkg.com/marked/-/marked-4.0.18.tgz#cd0ac54b2e5610cfb90e8fd46ccaa8292c9ed569" + integrity sha512-wbLDJ7Zh0sqA0Vdg6aqlbT+yPxqLblpAZh1mK2+AO2twQkPywvvqQNfEPVwSSRjZ7dZcdeVBIAgiO7MMp3Dszw== matchdep@^2.0.0: version "2.0.0" @@ -6492,12 +6574,12 @@ math-random@^1.0.1: resolved "https://registry.yarnpkg.com/math-random/-/math-random-1.0.4.tgz#5dd6943c938548267016d4e34f057583080c514c" integrity sha512-rUxjysqif/BZQH2yhd5Aaq7vXMSx9NdEsQcyA07uEzIvxgI7zIr33gGsh+RU0/XjmQpCW7RsVof1vlkvQVCK5A== -memfs@3.4.4: - version "3.4.4" - resolved "https://registry.yarnpkg.com/memfs/-/memfs-3.4.4.tgz#e8973cd8060548916adcca58a248e7805c715e89" - integrity sha512-W4gHNUE++1oSJVn8Y68jPXi+mkx3fXR5ITE/Ubz6EQ3xRpCN5k2CQ4AUR8094Z7211F876TyoBACGsIveqgiGA== +memfs@3.4.7: + version "3.4.7" + resolved "https://registry.yarnpkg.com/memfs/-/memfs-3.4.7.tgz#e5252ad2242a724f938cb937e3c4f7ceb1f70e5a" + integrity sha512-ygaiUSNalBX85388uskeCyhSAoOSgzBbtVCr9jA2RROssFL9Q19/ZXFqS+2Th2sr1ewNIWgFdLzLC3Yl1Zv+lw== dependencies: - fs-monkey "1.0.3" + fs-monkey "^1.0.3" memoizee@0.4.X: version "0.4.15" @@ -6521,10 +6603,10 @@ memory-fs@^0.5.0: errno "^0.1.3" readable-stream "^2.0.1" -meow@^10.1.0: - version "10.1.2" - resolved "https://registry.yarnpkg.com/meow/-/meow-10.1.2.tgz#62951cb69afa69594142c8250806bc30a3912e4d" - integrity sha512-zbuAlN+V/sXlbGchNS9WTWjUzeamwMt/BApKCJi7B0QyZstZaMx0n4Unll/fg0njGtMdC9UP5SAscvOCLYdM+Q== +meow@^10.1.3: + version "10.1.3" + resolved "https://registry.yarnpkg.com/meow/-/meow-10.1.3.tgz#21689959a7d00e8901aff30d208acb2122eb8088" + integrity sha512-0WL7RMCPPdUTE00+GxJjL4d5Dm6eUbmAzxlzywJWiRUKCW093owmZ7/q74tH9VI91vxw9KJJNxAcvdpxb2G4iA== dependencies: "@types/minimist" "^1.2.2" camelcase-keys "^7.0.0" @@ -6615,6 +6697,13 @@ min-indent@^1.0.0, min-indent@^1.0.1: resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869" integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg== +minimatch@3.0.5: + version "3.0.5" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.5.tgz#4da8f1290ee0f0f8e83d60ca69f8f134068604a3" + integrity sha512-tUpxzX0VAzJHjLu0xUfFv1gwVp9ba3IOuRAVH2EGuRW8a5emA2FlACLqiT/lDVtS1W+TGNwqz3sWaNyLgDJWuw== + dependencies: + brace-expansion "^1.1.7" + minimatch@^3.0.4, minimatch@^3.1.1, minimatch@^3.1.2: version "3.1.2" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" @@ -6638,7 +6727,7 @@ minimist-options@4.1.0: is-plain-obj "^1.1.0" kind-of "^6.0.3" -minimist@1.x, minimist@^1.2.0, minimist@^1.2.5: +minimist@1.x, minimist@^1.2.0, minimist@^1.2.5, minimist@^1.2.6: version "1.2.6" resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.6.tgz#8637a5b759ea0d6e98702cfb3a9283323c93af44" integrity sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q== @@ -6650,17 +6739,6 @@ minipass-collect@^1.0.2: dependencies: minipass "^3.0.0" -minipass-fetch@^1.3.0, minipass-fetch@^1.3.2: - version "1.4.1" - resolved "https://registry.yarnpkg.com/minipass-fetch/-/minipass-fetch-1.4.1.tgz#d75e0091daac1b0ffd7e9d41629faff7d0c1f1b6" - integrity sha512-CGH1eblLq26Y15+Azk7ey4xh0J/XfJfrCox5LDJiKqI2Q2iwOLOKrlmIaODiSQS8d18jalF6y2K2ePUm0CmShw== - dependencies: - minipass "^3.1.0" - minipass-sized "^1.0.3" - minizlib "^2.0.0" - optionalDependencies: - encoding "^0.1.12" - minipass-fetch@^2.0.3: version "2.1.0" resolved "https://registry.yarnpkg.com/minipass-fetch/-/minipass-fetch-2.1.0.tgz#ca1754a5f857a3be99a9271277246ac0b44c3ff8" @@ -6687,7 +6765,7 @@ minipass-json-stream@^1.0.1: jsonparse "^1.3.1" minipass "^3.0.0" -minipass-pipeline@^1.2.2, minipass-pipeline@^1.2.4: +minipass-pipeline@^1.2.4: version "1.2.4" resolved "https://registry.yarnpkg.com/minipass-pipeline/-/minipass-pipeline-1.2.4.tgz#68472f79711c084657c067c5c6ad93cddea8214c" integrity sha512-xuIq7cIOt09RPRJ19gdi4b+RiNvDFYe5JH+ggNvBqGqpQXcru3PcRmOZuHBKWK1Txf9+cQ+HMVN4d6z46LZP7A== @@ -6701,14 +6779,14 @@ minipass-sized@^1.0.3: dependencies: minipass "^3.0.0" -minipass@^3.0.0, minipass@^3.1.0, minipass@^3.1.1, minipass@^3.1.3, minipass@^3.1.6: - version "3.1.6" - resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.1.6.tgz#3b8150aa688a711a1521af5e8779c1d3bb4f45ee" - integrity sha512-rty5kpw9/z8SX9dmxblFA6edItUmwJgMeYDZRrwlIVN27i8gysGbznJwUggw2V/FVqFSDdWy040ZPS811DYAqQ== +minipass@^3.0.0, minipass@^3.1.1, minipass@^3.1.6: + version "3.3.4" + resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.3.4.tgz#ca99f95dd77c43c7a76bf51e6d200025eee0ffae" + integrity sha512-I9WPbWHCGu8W+6k1ZiGpPu0GkoKBeorkfKNuAFBNS1HNFJvke82sxvI5bzcCNpWPorkOO5QQ+zomzzwRxejXiw== dependencies: yallist "^4.0.0" -minizlib@^2.0.0, minizlib@^2.1.1, minizlib@^2.1.2: +minizlib@^2.1.1, minizlib@^2.1.2: version "2.1.2" resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-2.1.2.tgz#e90d3466ba209b932451508a11ce3d3632145931" integrity sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg== @@ -6744,9 +6822,9 @@ modify-values@^1.0.0: integrity sha512-xV2bxeN6F7oYjZWTe/YPAy6MN2M+sL4u/Rlm2AHCIVGfo2p1yGmBHQ6vHehl4bRTZBdHu3TSkWdYgkwpYzAGSw== mrmime@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/mrmime/-/mrmime-1.0.0.tgz#14d387f0585a5233d291baba339b063752a2398b" - integrity sha512-a70zx7zFfVO7XpnQ2IX1Myh9yY4UYvfld/dikWRnsXxbyvMcfz+u6UfgNAtH+k2QqtJuzVpv6eLTx1G2+WKZbQ== + version "1.0.1" + resolved "https://registry.yarnpkg.com/mrmime/-/mrmime-1.0.1.tgz#5f90c825fad4bdd41dc914eff5d1a8cfdaf24f27" + integrity sha512-hzzEagAgDyoU1Q6yg5uI+AorQgdvMCur3FcKf7NhMKWsaYg+RnbTyHRa/9IlLF9rf455MOCtcqqrQQ83pPP7Uw== ms@2.0.0: version "2.0.0" @@ -6819,7 +6897,7 @@ natural-compare@^1.4.0: resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7" integrity sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw== -negotiator@^0.6.2, negotiator@^0.6.3: +negotiator@^0.6.3: version "0.6.3" resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.3.tgz#58e323a72fedc0d6f9cd4d31fe49f51479590ccd" integrity sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg== @@ -6834,6 +6912,11 @@ next-tick@1, next-tick@^1.1.0: resolved "https://registry.yarnpkg.com/next-tick/-/next-tick-1.1.0.tgz#1836ee30ad56d67ef281b22bd199f709449b35eb" integrity sha512-CXdUiJembsNjuToQvxayPZF9Vqht7hewsvy2sOWafLvi2awflj9mOC6bHIg50orX8IJvWKY9wYQ/zB2kogPslQ== +node-addon-api@^3.2.1: + version "3.2.1" + resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-3.2.1.tgz#81325e0a2117789c0128dab65e7e38f07ceba161" + integrity sha512-mmcei9JghVNDYydghQmeDX8KoAm0FAiYyIcUt/N4nhyAipB17pllZQDOJD2fotxABnt4Mdz+dKTO7eftLg4d0A== + node-fetch@^2.6.1, node-fetch@^2.6.7: version "2.6.7" resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.7.tgz#24de9fba827e3b4ae44dc8b20256a379160052ad" @@ -6841,15 +6924,20 @@ node-fetch@^2.6.1, node-fetch@^2.6.7: dependencies: whatwg-url "^5.0.0" -node-gyp@^8.4.1: - version "8.4.1" - resolved "https://registry.yarnpkg.com/node-gyp/-/node-gyp-8.4.1.tgz#3d49308fc31f768180957d6b5746845fbd429937" - integrity sha512-olTJRgUtAb/hOXG0E93wZDs5YiJlgbXxTwQAFHyNlRsXQnYzUaF2aGgujZbw+hR8aF4ZG/rST57bWMWD16jr9w== +node-gyp-build@^4.3.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.5.0.tgz#7a64eefa0b21112f89f58379da128ac177f20e40" + integrity sha512-2iGbaQBV+ITgCz76ZEjmhUKAKVf7xfY1sRl4UiKQspfZMH2h06SyhNsnSVy50cwkFQDGLyif6m/6uFXHkOZ6rg== + +node-gyp@^9.0.0: + version "9.1.0" + resolved "https://registry.yarnpkg.com/node-gyp/-/node-gyp-9.1.0.tgz#c8d8e590678ea1f7b8097511dedf41fc126648f8" + integrity sha512-HkmN0ZpQJU7FLbJauJTHkHlSVAXlNGDAzH/VYFZGDOnFyn/Na3GlNJfkudmufOdS6/jNFhy88ObzL7ERz9es1g== dependencies: env-paths "^2.2.0" glob "^7.1.4" graceful-fs "^4.2.6" - make-fetch-happen "^9.1.0" + make-fetch-happen "^10.0.3" nopt "^5.0.0" npmlog "^6.0.0" rimraf "^3.0.2" @@ -6860,12 +6948,12 @@ node-gyp@^8.4.1: node-int64@^0.4.0: version "0.4.0" resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b" - integrity sha1-h6kGXNs1XTGC2PlM4RGIuCXGijs= + integrity sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw== -node-releases@^2.0.3: - version "2.0.5" - resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-2.0.5.tgz#280ed5bc3eba0d96ce44897d8aee478bfb3d9666" - integrity sha512-U9h1NLROZTq9uE1SNffn6WuPDg8icmi3ns4rEl/oTfIle4iLjTliCzgTsbaIFMq/Xn078/lfY/BL0GWZ+psK4Q== +node-releases@^2.0.6: + version "2.0.6" + resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-2.0.6.tgz#8a7088c63a55e493845683ebf3c828d8c51c5503" + integrity sha512-PiVXnNuFm5+iYkLBNeq5211hvO38y63T0i2KKh2KnUs3RpzJ+JtODFjkD8yjLwnDkTYF1eKXheUwdssR+NRZdg== nopt@^5.0.0: version "5.0.0" @@ -6907,11 +6995,11 @@ normalize-package-data@^4.0.0: normalize-path@^2.0.1, normalize-path@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-2.1.1.tgz#1ab28b556e198363a8c1a6f7e6fa20137fe6aed9" - integrity sha1-GrKLVW4Zg2Oowab35vogE3/mrtk= + integrity sha512-3pKJwH184Xo/lnH6oyP1q2pMd7HcypqqmRs91/6/i2CGtWwIKGCkOOMTm/zXbgTEWHw1uNpNi/igc3ePOYHb6w== dependencies: remove-trailing-separator "^1.0.1" -normalize-path@^3.0.0: +normalize-path@^3.0.0, normalize-path@~3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65" integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA== @@ -6947,38 +7035,29 @@ npm-normalize-package-bin@^1.0.0, npm-normalize-package-bin@^1.0.1: resolved "https://registry.yarnpkg.com/npm-normalize-package-bin/-/npm-normalize-package-bin-1.0.1.tgz#6e79a41f23fd235c0623218228da7d9c23b8f6e2" integrity sha512-EPfafl6JL5/rU+ot6P3gRSCpPDW5VmIzX959Ob1+ySFUuuYHWHekXpwdUZcKP5C+DS4GEtdJluwBjnsNDl+fSA== -npm-package-arg@^8.0.0, npm-package-arg@^8.1.0, npm-package-arg@^8.1.2, npm-package-arg@^8.1.5: - version "8.1.5" - resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-8.1.5.tgz#3369b2d5fe8fdc674baa7f1786514ddc15466e44" - integrity sha512-LhgZrg0n0VgvzVdSm1oiZworPbTxYHUJCgtsJW8mGvlDpxTM1vSJc3m5QZeUkhAHIzbz3VCHd/R4osi1L1Tg/Q== +npm-package-arg@8.1.1: + version "8.1.1" + resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-8.1.1.tgz#00ebf16ac395c63318e67ce66780a06db6df1b04" + integrity sha512-CsP95FhWQDwNqiYS+Q0mZ7FAEDytDZAkNxQqea6IaAFJTAY9Lhhqyl0irU/6PMc7BGfUmnsbHcqxJD7XuVM/rg== dependencies: - hosted-git-info "^4.0.1" - semver "^7.3.4" + hosted-git-info "^3.0.6" + semver "^7.0.0" validate-npm-package-name "^3.0.0" npm-package-arg@^9.0.0, npm-package-arg@^9.0.1: - version "9.0.2" - resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-9.0.2.tgz#f3ef7b1b3b02e82564af2d5228b4c36567dcd389" - integrity sha512-v/miORuX8cndiOheW8p2moNuPJ7QhcFh9WGlTorruG8hXSA23vMTEp5hTCmDxic0nD8KHhj/NQgFuySD3GYY3g== + version "9.1.0" + resolved "https://registry.yarnpkg.com/npm-package-arg/-/npm-package-arg-9.1.0.tgz#a60e9f1e7c03e4e3e4e994ea87fff8b90b522987" + integrity sha512-4J0GL+u2Nh6OnhvUKXRr2ZMG4lR8qtLp+kv7UiV00Y+nGiSxtttCyIRHCt5L5BNkXQld/RceYItau3MDOoGiBw== dependencies: hosted-git-info "^5.0.0" + proc-log "^2.0.1" semver "^7.3.5" validate-npm-package-name "^4.0.0" -npm-packlist@^2.1.4: - version "2.2.2" - resolved "https://registry.yarnpkg.com/npm-packlist/-/npm-packlist-2.2.2.tgz#076b97293fa620f632833186a7a8f65aaa6148c8" - integrity sha512-Jt01acDvJRhJGthnUJVF/w6gumWOZxO7IkpY/lsX9//zqQgnF7OJaxgQXcerd4uQOLu7W5bkb4mChL9mdfm+Zg== - dependencies: - glob "^7.1.6" - ignore-walk "^3.0.3" - npm-bundled "^1.1.1" - npm-normalize-package-bin "^1.0.1" - -npm-packlist@^5.1.0: - version "5.1.0" - resolved "https://registry.yarnpkg.com/npm-packlist/-/npm-packlist-5.1.0.tgz#f3fd52903a021009913a133732022132eb355ce7" - integrity sha512-a04sqF6FbkyOAFA19AA0e94gS7Et5T2/IMj3VOT9nOF2RaRdVPQ1Q17Fb/HaDRFs+gbC7HOmhVZ29adpWgmDZg== +npm-packlist@^5.1.0, npm-packlist@^5.1.1: + version "5.1.1" + resolved "https://registry.yarnpkg.com/npm-packlist/-/npm-packlist-5.1.1.tgz#79bcaf22a26b6c30aa4dd66b976d69cc286800e0" + integrity sha512-UfpSvQ5YKwctmodvPPkK6Fwk603aoVsf8AEbmVKAEECrfvL8SSe1A2YIwrJ6xmTHAITKPwwZsWo7WwEbNk0kxw== dependencies: glob "^8.0.1" ignore-walk "^5.0.1" @@ -6995,22 +7074,10 @@ npm-pick-manifest@^7.0.0: npm-package-arg "^9.0.0" semver "^7.3.5" -npm-registry-fetch@^11.0.0: - version "11.0.0" - resolved "https://registry.yarnpkg.com/npm-registry-fetch/-/npm-registry-fetch-11.0.0.tgz#68c1bb810c46542760d62a6a965f85a702d43a76" - integrity sha512-jmlgSxoDNuhAtxUIG6pVwwtz840i994dL14FoNVZisrmZW5kWd63IUTNv1m/hyRSGSqWjCUp/YZlS1BJyNp9XA== - dependencies: - make-fetch-happen "^9.0.1" - minipass "^3.1.3" - minipass-fetch "^1.3.0" - minipass-json-stream "^1.0.1" - minizlib "^2.0.0" - npm-package-arg "^8.0.0" - -npm-registry-fetch@^13.0.0, npm-registry-fetch@^13.0.1: - version "13.1.1" - resolved "https://registry.yarnpkg.com/npm-registry-fetch/-/npm-registry-fetch-13.1.1.tgz#26dc4b26d0a545886e807748032ba2aefaaae96b" - integrity sha512-5p8rwe6wQPLJ8dMqeTnA57Dp9Ox6GH9H60xkyJup07FmVlu3Mk7pf/kIIpl9gaN5bM8NM+UUx3emUWvDNTt39w== +npm-registry-fetch@^13.0.0, npm-registry-fetch@^13.0.1, npm-registry-fetch@^13.3.0: + version "13.3.0" + resolved "https://registry.yarnpkg.com/npm-registry-fetch/-/npm-registry-fetch-13.3.0.tgz#0ce10fa4a699a1e70685ecf41bbfb4150d74231b" + integrity sha512-10LJQ/1+VhKrZjIuY9I/+gQTvumqqlgnsCufoXETHAPFTS3+M+Z5CFhZRDHGavmJ6rOye3UvNga88vl8n1r6gg== dependencies: make-fetch-happen "^10.0.6" minipass "^3.1.6" @@ -7020,20 +7087,6 @@ npm-registry-fetch@^13.0.0, npm-registry-fetch@^13.0.1: npm-package-arg "^9.0.1" proc-log "^2.0.0" -npm-registry-fetch@^9.0.0: - version "9.0.0" - resolved "https://registry.yarnpkg.com/npm-registry-fetch/-/npm-registry-fetch-9.0.0.tgz#86f3feb4ce00313bc0b8f1f8f69daae6face1661" - integrity sha512-PuFYYtnQ8IyVl6ib9d3PepeehcUeHN9IO5N/iCRhyg9tStQcqGQBRVHmfmMWPDERU3KwZoHFvbJ4FPXPspvzbA== - dependencies: - "@npmcli/ci-detect" "^1.0.0" - lru-cache "^6.0.0" - make-fetch-happen "^8.0.9" - minipass "^3.1.3" - minipass-fetch "^1.3.0" - minipass-json-stream "^1.0.1" - minizlib "^2.0.0" - npm-package-arg "^8.0.0" - npm-run-path@^4.0.1: version "4.0.1" resolved "https://registry.yarnpkg.com/npm-run-path/-/npm-run-path-4.0.1.tgz#b7ecd1e5ed53da8e37a55e1c2269e0b97ed748ea" @@ -7041,16 +7094,6 @@ npm-run-path@^4.0.1: dependencies: path-key "^3.0.0" -npmlog@^4.1.2: - version "4.1.2" - resolved "https://registry.yarnpkg.com/npmlog/-/npmlog-4.1.2.tgz#08a7f2a8bf734604779a9efa4ad5cc717abb954b" - integrity sha512-2uUqazuKlTaSI/dC8AzicUck7+IrEaOnN/e0jd3Xtt1KcGpwx30v50mL7oPyr/h9bL3E4aZccVwpwP+5W9Vjkg== - dependencies: - are-we-there-yet "~1.1.2" - console-control-strings "~1.1.0" - gauge "~2.7.3" - set-blocking "~2.0.0" - npmlog@^6.0.0, npmlog@^6.0.2: version "6.0.2" resolved "https://registry.yarnpkg.com/npmlog/-/npmlog-6.0.2.tgz#c8166017a42f2dea92d6453168dd865186a70830" @@ -7064,27 +7107,58 @@ npmlog@^6.0.0, npmlog@^6.0.2: number-is-nan@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/number-is-nan/-/number-is-nan-1.0.1.tgz#097b602b53422a522c1afb8790318336941a011d" - integrity sha1-CXtgK1NCKlIsGvuHkDGDNpQaAR0= + integrity sha512-4jbtZXNAsfZbAHiiqjLPBiCl16dES1zI4Hpzzxw61Tk+loF+sBDBKx1ICKKKwIqQ7M0mFn1TmkN7euSncWgHiQ== + +nx@14.5.4, "nx@>=14.4.3 < 16": + version "14.5.4" + resolved "https://registry.yarnpkg.com/nx/-/nx-14.5.4.tgz#58b6e8ee798733a6ab9aff2a19180c371482fa10" + integrity sha512-xv1nTaQP6kqVDE4PXcB1tLlgzNAPUHE/2vlqSLgxjNb6colKf0vrEZhVTjhnbqBeJiTb33gUx50bBXkurCkN5w== + dependencies: + "@nrwl/cli" "14.5.4" + "@nrwl/tao" "14.5.4" + "@parcel/watcher" "2.0.4" + chalk "4.1.0" + chokidar "^3.5.1" + cli-cursor "3.1.0" + cli-spinners "2.6.1" + cliui "^7.0.2" + dotenv "~10.0.0" + enquirer "~2.3.6" + fast-glob "3.2.7" + figures "3.2.0" + flat "^5.0.2" + fs-extra "^10.1.0" + glob "7.1.4" + ignore "^5.0.4" + js-yaml "4.1.0" + jsonc-parser "3.0.0" + minimatch "3.0.5" + npm-run-path "^4.0.1" + open "^8.4.0" + semver "7.3.4" + string-width "^4.2.3" + tar-stream "~2.2.0" + tmp "~0.2.1" + tsconfig-paths "^3.9.0" + tslib "^2.3.0" + v8-compile-cache "2.3.0" + yargs "^17.4.0" + yargs-parser "21.0.1" -object-assign@4.X, object-assign@^4.0.1, object-assign@^4.1.0: +object-assign@4.X, object-assign@^4.0.1: version "4.1.1" resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863" - integrity sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM= + integrity sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg== object-copy@^0.1.0: version "0.1.0" resolved "https://registry.yarnpkg.com/object-copy/-/object-copy-0.1.0.tgz#7e7d858b781bd7c991a41ba975ed3812754e998c" - integrity sha1-fn2Fi3gb18mRpBupde04EnVOmYw= + integrity sha512-79LYn6VAb63zgtmAteVOWo9Vdj71ZVBy3Pbse+VqxDpEP83XuujMrGqHIwAXJ5I/aM0zU7dIyIAhifVTPrNItQ== dependencies: copy-descriptor "^0.1.0" define-property "^0.2.5" kind-of "^3.0.3" -object-inspect@^1.9.0: - version "1.12.2" - resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.12.2.tgz#c0641f26394532f28ab8d796ab954e43c009a8ea" - integrity sha512-z+cPxW0QGUp0mcqcsgQyLVRDoXFQbXOwBaqyF7VIgI4TWNQsDHrBpUQslRmIfAoYWdYzs6UlKJtB2XJpTaNSpQ== - object-keys@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e" @@ -7093,7 +7167,7 @@ object-keys@^1.1.1: object-visit@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/object-visit/-/object-visit-1.0.1.tgz#f79c4493af0c5377b59fe39d395e41042dd045bb" - integrity sha1-95xEk68MU3e1n+OdOV5BBC3QRbs= + integrity sha512-GBaMwwAVK9qbQN3Scdo0OyvgPW7l3lnaVMj84uTOZlswkX0KpF6fyDBJhtTthf7pymztoN36/KEr1DyhF96zEA== dependencies: isobject "^3.0.0" @@ -7110,7 +7184,7 @@ object.assign@^4.0.4, object.assign@^4.1.0: object.defaults@^1.0.0, object.defaults@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/object.defaults/-/object.defaults-1.1.0.tgz#3a7f868334b407dea06da16d88d5cd29e435fecf" - integrity sha1-On+GgzS0B96gbaFtiNXNKeQ1/s8= + integrity sha512-c/K0mw/F11k4dEUBMW8naXUuBuhxRCfG7W+yFy8EcijU/rSmazOUd1XAEEe6bC0OuXY4HUKjTJv7xbxIMqdxrA== dependencies: array-each "^1.0.1" array-slice "^1.0.0" @@ -7120,7 +7194,7 @@ object.defaults@^1.0.0, object.defaults@^1.1.0: object.map@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/object.map/-/object.map-1.0.1.tgz#cf83e59dc8fcc0ad5f4250e1f78b3b81bd801d37" - integrity sha1-z4Plncj8wK1fQlDh94s7gb2AHTc= + integrity sha512-3+mAJu2PLfnSVGHwIWubpOFLscJANBKuB/6A4CxBstc4aqwQY0FWcsppuy4jU5GSB95yES5JHSI+33AWuS4k6w== dependencies: for-own "^1.0.0" make-iterator "^1.0.0" @@ -7128,14 +7202,14 @@ object.map@^1.0.0: object.pick@^1.2.0, object.pick@^1.3.0: version "1.3.0" resolved "https://registry.yarnpkg.com/object.pick/-/object.pick-1.3.0.tgz#87a10ac4c1694bd2e1cbf53591a66141fb5dd747" - integrity sha1-h6EKxMFpS9Lhy/U1kaZhQftd10c= + integrity sha512-tqa/UMy/CCoYmj+H5qc07qvSL9dqcs/WZENZ1JbtWBlATP+iVOe778gE6MSijnyCnORzDuX6hU+LA4SZ09YjFQ== dependencies: isobject "^3.0.1" object.reduce@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/object.reduce/-/object.reduce-1.0.1.tgz#6fe348f2ac7fa0f95ca621226599096825bb03ad" - integrity sha1-b+NI8qx/oPlcpiEiZZkJaCW7A60= + integrity sha512-naLhxxpUESbNkRqc35oQ2scZSJueHGQNUfMW/0U37IgN6tE2dgDWg3whf+NEliy3F/QysrO48XKUz/nGPe+AQw== dependencies: for-own "^1.0.0" make-iterator "^1.0.0" @@ -7143,7 +7217,7 @@ object.reduce@^1.0.0: once@^1.3.0, once@^1.3.1, once@^1.3.2, once@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1" - integrity sha1-WDsap3WWHUsROsF9nFC6753Xa9E= + integrity sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w== dependencies: wrappy "1" @@ -7154,6 +7228,15 @@ onetime@^5.1.0, onetime@^5.1.2: dependencies: mimic-fn "^2.1.0" +open@^8.4.0: + version "8.4.0" + resolved "https://registry.yarnpkg.com/open/-/open-8.4.0.tgz#345321ae18f8138f82565a910fdc6b39e8c244f8" + integrity sha512-XgFPPM+B28FtCCgSb9I+s9szOC1vZRSwgWsRUA5ylIxRTgKozqjOCrVOqGsYABPYK5qnfqClxZTFBa8PKt2v6Q== + dependencies: + define-lazy-prop "^2.0.0" + is-docker "^2.1.1" + is-wsl "^2.2.0" + opener@^1.5.2: version "1.5.2" resolved "https://registry.yarnpkg.com/opener/-/opener-1.5.2.tgz#5d37e1f35077b9dcac4301372271afdeb2a13598" @@ -7171,29 +7254,44 @@ optionator@^0.9.1: type-check "^0.4.0" word-wrap "^1.2.3" +ora@^5.4.1: + version "5.4.1" + resolved "https://registry.yarnpkg.com/ora/-/ora-5.4.1.tgz#1b2678426af4ac4a509008e5e4ac9e9959db9e18" + integrity sha512-5b6Y85tPxZZ7QytO+BQzysW31HJku27cRIlkbAXaNx+BdcVi+LlRFmVXzeF6a7JCwJpyw5c4b+YSVImQIrBpuQ== + dependencies: + bl "^4.1.0" + chalk "^4.1.0" + cli-cursor "^3.1.0" + cli-spinners "^2.5.0" + is-interactive "^1.0.0" + is-unicode-supported "^0.1.0" + log-symbols "^4.1.0" + strip-ansi "^6.0.0" + wcwidth "^1.0.1" + ordered-read-streams@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/ordered-read-streams/-/ordered-read-streams-1.0.1.tgz#77c0cb37c41525d64166d990ffad7ec6a0e1363e" - integrity sha1-d8DLN8QVJdZBZtmQ/61+xqDhNj4= + integrity sha512-Z87aSjx3r5c0ZB7bcJqIgIRX5bxR7A4aSzvIbaxd0oTkWBCOoKfuGHiKj60CHVUgg1Phm5yMZzBdt8XqRs73Mw== dependencies: readable-stream "^2.0.1" os-locale@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/os-locale/-/os-locale-1.4.0.tgz#20f9f17ae29ed345e8bde583b13d2009803c14d9" - integrity sha1-IPnxeuKe00XoveWDsT0gCYA8FNk= + integrity sha512-PRT7ZORmwu2MEFt4/fv3Q+mEfN4zetKxufQrkShY2oGvUms9r8otu5HfdyIFHkYXjO7laNsoVGmM2MANfuTA8g== dependencies: lcid "^1.0.0" os-tmpdir@~1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/os-tmpdir/-/os-tmpdir-1.0.2.tgz#bbe67406c79aa85c5cfec766fe5734555dfa1274" - integrity sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ= + integrity sha512-D2FR03Vir7FIu45XBY20mTb+/ZSWB00sjU9jdQXt83gDrI4Ztz5Fs7/yy74g2N5SVQY4xY1qDr4rNddwYRVX0g== p-finally@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/p-finally/-/p-finally-1.0.0.tgz#3fbcfb15b899a44123b34b6dcc18b724336a2cae" - integrity sha1-P7z7FbiZpEEjs0ttzBi3JDNqLK4= + integrity sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow== p-limit@^1.1.0: version "1.3.0" @@ -7209,7 +7307,7 @@ p-limit@^2.2.0: dependencies: p-try "^2.0.0" -p-limit@^3.0.2: +p-limit@^3.0.2, p-limit@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b" integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ== @@ -7219,7 +7317,7 @@ p-limit@^3.0.2: p-locate@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-2.0.0.tgz#20a0103b222a70c8fd39cc2e580680f3dde5ec43" - integrity sha1-IKAQOyIqcMj9OcwuWAaA893l7EM= + integrity sha512-nQja7m7gSKuewoVRen45CtVfODR3crN3goVQ0DDZ9N3yHxgpkuBhZqsaiotSQRrADUrne346peY7kT3TSACykg== dependencies: p-limit "^1.1.0" @@ -7249,6 +7347,13 @@ p-map@^4.0.0: dependencies: aggregate-error "^3.0.0" +p-map@^5.5.0: + version "5.5.0" + resolved "https://registry.yarnpkg.com/p-map/-/p-map-5.5.0.tgz#054ca8ca778dfa4cf3f8db6638ccb5b937266715" + integrity sha512-VFqfGDHlx87K66yZrNdI4YGtD70IRyd+zSvgks6mzHPRNkoKy+9EKP4SFC77/vTTQYmRmti7dvqC+m5jBrBAcg== + dependencies: + aggregate-error "^4.0.0" + p-pipe@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/p-pipe/-/p-pipe-3.1.0.tgz#48b57c922aa2e1af6a6404cb7c6bf0eb9cc8e60e" @@ -7277,7 +7382,7 @@ p-timeout@^3.2.0: p-try@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/p-try/-/p-try-1.0.0.tgz#cbc79cdbaf8fd4228e13f621f2b1a237c1b207b3" - integrity sha1-y8ec26+P1CKOE/Yh8rGiN8GyB7M= + integrity sha512-U1etNYuMJoIz3ZXSrrySFjsXQTWOx2/jdi86L+2pRvph/qMKL6sbcCYdH23fqsbm8TH2Gn0OybpT4eSFlCVHww== p-try@^2.0.0: version "2.2.0" @@ -7291,15 +7396,15 @@ p-waterfall@^2.1.1: dependencies: p-reduce "^2.0.0" -pacote@^13.0.3, pacote@^13.0.5, pacote@^13.4.1: - version "13.5.0" - resolved "https://registry.yarnpkg.com/pacote/-/pacote-13.5.0.tgz#e2c745dc320513a98b9403e92b366a1ba6a4db94" - integrity sha512-yekp0ykEsaBH0t0bYA/89R+ywdYV5ZnEdg4YMIfqakSlpIhoF6b8+aEUm8NZpfWRgmy6lxgywcW05URhLRogVQ== +pacote@^13.0.3, pacote@^13.6.1: + version "13.6.1" + resolved "https://registry.yarnpkg.com/pacote/-/pacote-13.6.1.tgz#ac6cbd9032b4c16e5c1e0c60138dfe44e4cc589d" + integrity sha512-L+2BI1ougAPsFjXRyBhcKmfT016NscRFLv6Pz5EiNf1CCFJFU0pSKKQwsZTyAQB+sTuUL4TyFyp6J1Ork3dOqw== dependencies: "@npmcli/git" "^3.0.0" "@npmcli/installed-package-contents" "^1.0.7" "@npmcli/promise-spawn" "^3.0.0" - "@npmcli/run-script" "^3.0.1" + "@npmcli/run-script" "^4.1.0" cacache "^16.0.0" chownr "^2.0.0" fs-minipass "^2.1.0" @@ -7321,7 +7426,7 @@ pacote@^13.0.3, pacote@^13.0.5, pacote@^13.4.1: pad-left@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/pad-left/-/pad-left-2.1.0.tgz#16e6a3b2d44a8e138cb0838cc7cb403a4fc9e994" - integrity sha1-FuajstRKjhOMsIOMx8tAOk/J6ZQ= + integrity sha512-HJxs9K9AztdIQIAIa/OIazRAUW/L6B9hbQDxO4X07roW3eo9XqZc2ur9bn1StH9CnbbI9EgvejHQX7CBpCF1QA== dependencies: repeat-string "^1.5.4" @@ -7344,7 +7449,7 @@ parse-conflict-json@^2.0.1: parse-filepath@^1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/parse-filepath/-/parse-filepath-1.0.2.tgz#a632127f53aaf3d15876f5872f3ffac763d6c891" - integrity sha1-pjISf1Oq89FYdvWHLz/6x2PWyJE= + integrity sha512-FwdRXKCohSVeXqwtYonZTXtbGJKrn+HNyWDYVcp5yuJlesTwNH4rsmRZ+GrKAPJ5bLpRxESMeS+Rl0VCHRvB2Q== dependencies: is-absolute "^1.0.0" map-cache "^0.2.0" @@ -7353,14 +7458,14 @@ parse-filepath@^1.0.1: parse-json@^2.2.0: version "2.2.0" resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-2.2.0.tgz#f480f40434ef80741f8469099f8dea18f55a4dc9" - integrity sha1-9ID0BDTvgHQfhGkJn43qGPVaTck= + integrity sha512-QR/GGaKCkhwk1ePQNYDRKYZ3mwU9ypsKhB0XyFnLQdomyEqk3e8wpW3V5Jp88zbxK4n5ST1nqo+g9juTpownhQ== dependencies: error-ex "^1.2.0" parse-json@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/parse-json/-/parse-json-4.0.0.tgz#be35f5425be1f7f6c747184f98a788cb99477ee0" - integrity sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA= + integrity sha512-aOIos8bujGN93/8Ox/jPLh7RwVnPEysynVFE+fQZyg6jKELEHwzgKdLRFHUgXJL6kylijVSBC4BvN9OmsB48Rw== dependencies: error-ex "^1.3.1" json-parse-better-errors "^1.0.1" @@ -7383,49 +7488,46 @@ parse-node-version@^1.0.0: parse-passwd@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/parse-passwd/-/parse-passwd-1.0.0.tgz#6d5b934a456993b23d37f40a382d6f1666a8e5c6" - integrity sha1-bVuTSkVpk7I9N/QKOC1vFmao5cY= + integrity sha512-1Y1A//QUXEZK7YKz+rD9WydcE1+EuPr6ZBgKecAB8tmoW6UFv0NREVJe1p+jRxtThkcbbKkfwIbWJe/IeE6m2Q== -parse-path@^4.0.0: - version "4.0.3" - resolved "https://registry.yarnpkg.com/parse-path/-/parse-path-4.0.3.tgz#82d81ec3e071dcc4ab49aa9f2c9c0b8966bb22bf" - integrity sha512-9Cepbp2asKnWTJ9x2kpw6Fe8y9JDbqwahGCTvklzd/cEq5C5JC59x2Xb0Kx+x0QZ8bvNquGO8/BWP0cwBHzSAA== +parse-path@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/parse-path/-/parse-path-5.0.0.tgz#f933152f3c6d34f4cf36cfc3d07b138ac113649d" + integrity sha512-qOpH55/+ZJ4jUu/oLO+ifUKjFPNZGfnPJtzvGzKN/4oLMil5m9OH4VpOj6++9/ytJcfks4kzH2hhi87GL/OU9A== dependencies: - is-ssh "^1.3.0" - protocols "^1.4.0" - qs "^6.9.4" - query-string "^6.13.8" + protocols "^2.0.0" -parse-url@^6.0.0: - version "6.0.0" - resolved "https://registry.yarnpkg.com/parse-url/-/parse-url-6.0.0.tgz#f5dd262a7de9ec00914939220410b66cff09107d" - integrity sha512-cYyojeX7yIIwuJzledIHeLUBVJ6COVLeT4eF+2P6aKVzwvgKQPndCBv3+yQ7pcWjqToYwaligxzSYNNmGoMAvw== +parse-url@^7.0.2: + version "7.0.2" + resolved "https://registry.yarnpkg.com/parse-url/-/parse-url-7.0.2.tgz#d21232417199b8d371c6aec0cedf1406fd6393f0" + integrity sha512-PqO4Z0eCiQ08Wj6QQmrmp5YTTxpYfONdOEamrtvK63AmzXpcavIVQubGHxOEwiIoDZFb8uDOoQFS0NCcjqIYQg== dependencies: - is-ssh "^1.3.0" + is-ssh "^1.4.0" normalize-url "^6.1.0" - parse-path "^4.0.0" - protocols "^1.4.0" + parse-path "^5.0.0" + protocols "^2.0.1" pascalcase@^0.1.1: version "0.1.1" resolved "https://registry.yarnpkg.com/pascalcase/-/pascalcase-0.1.1.tgz#b363e55e8006ca6fe21784d2db22bd15d7917f14" - integrity sha1-s2PlXoAGym/iF4TS2yK9FdeRfxQ= + integrity sha512-XHXfu/yOQRy9vYOtUDVMN60OEJjW013GoObG1o+xwQTpB9eYJX/BjXMsdW13ZDPruFhYYn0AG22w0xgQMwl3Nw== path-dirname@^1.0.0: version "1.0.2" resolved "https://registry.yarnpkg.com/path-dirname/-/path-dirname-1.0.2.tgz#cc33d24d525e099a5388c0336c6e32b9160609e0" - integrity sha1-zDPSTVJeCZpTiMAzbG4yuRYGCeA= + integrity sha512-ALzNPpyNq9AqXMBjeymIjFDAkAFH06mHJH/cSBHAgU0s4vfpBn6b2nf8tiRLvagKD8RbTpq2FKTBg7cl9l3c7Q== path-exists@^2.0.0: version "2.1.0" resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-2.1.0.tgz#0feb6c64f0fc518d9a754dd5efb62c7022761f4b" - integrity sha1-D+tsZPD8UY2adU3V77YscCJ2H0s= + integrity sha512-yTltuKuhtNeFJKa1PiRzfLAU5182q1y4Eb4XCJ3PBqyzEDkAZRzBrKKBct682ls9reBVHf9udYLN5Nd+K1B9BQ== dependencies: pinkie-promise "^2.0.0" path-exists@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-3.0.0.tgz#ce0ebeaa5f78cb18925ea7d810d7b59b010fd515" - integrity sha1-zg6+ql94yxiSXqfYENe1mwEP1RU= + integrity sha512-bpC7GYwiDYQ4wYLe+FA8lhRjhQCMcQGuSgGGqDkg/QerRWw9CmGRT0iSOVRSZJ29NMLZgIzqaljJ63oaL4NIJQ== path-exists@^4.0.0: version "4.0.0" @@ -7435,7 +7537,7 @@ path-exists@^4.0.0: path-is-absolute@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f" - integrity sha1-F0uSaHNVNP+8es5r9TpanhtcX18= + integrity sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg== path-key@^3.0.0, path-key@^3.1.0: version "3.1.1" @@ -7450,19 +7552,19 @@ path-parse@^1.0.7: path-root-regex@^0.1.0: version "0.1.2" resolved "https://registry.yarnpkg.com/path-root-regex/-/path-root-regex-0.1.2.tgz#bfccdc8df5b12dc52c8b43ec38d18d72c04ba96d" - integrity sha1-v8zcjfWxLcUsi0PsONGNcsBLqW0= + integrity sha512-4GlJ6rZDhQZFE0DPVKh0e9jmZ5egZfxTkp7bcRDuPlJXbAwhxcl2dINPUAsjLdejqaLsCeg8axcLjIbvBjN4pQ== path-root@^0.1.1: version "0.1.1" resolved "https://registry.yarnpkg.com/path-root/-/path-root-0.1.1.tgz#9a4a6814cac1c0cd73360a95f32083c8ea4745b7" - integrity sha1-mkpoFMrBwM1zNgqV8yCDyOpHRbc= + integrity sha512-QLcPegTHF11axjfojBIoDygmS2E3Lf+8+jI6wOVmNVenrKSo3mFdSGiIgdSHenczw3wPtlVMQaFVwGmM7BJdtg== dependencies: path-root-regex "^0.1.0" path-type@^1.0.0: version "1.1.0" resolved "https://registry.yarnpkg.com/path-type/-/path-type-1.1.0.tgz#59c44f7ee491da704da415da5a4070ba4f8fe441" - integrity sha1-WcRPfuSR2nBNpBXaWkBwuk+P5EE= + integrity sha512-S4eENJz1pkiQn9Znv33Q+deTOKmbl+jj1Fl+qiP/vYezj+S8x+J3Uo0ISrx/QoEvIlOaDWJhPaRd1flJ9HXZqg== dependencies: graceful-fs "^4.1.2" pify "^2.0.0" @@ -7490,7 +7592,7 @@ picocolors@^1.0.0: resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.0.0.tgz#cb5bdc74ff3f51892236eaf79d68bc44564ab81c" integrity sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ== -picomatch@^2.0.4, picomatch@^2.2.2, picomatch@^2.2.3, picomatch@^2.3.1: +picomatch@^2.0.4, picomatch@^2.2.1, picomatch@^2.2.2, picomatch@^2.2.3, picomatch@^2.3.1: version "2.3.1" resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" integrity sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA== @@ -7498,12 +7600,12 @@ picomatch@^2.0.4, picomatch@^2.2.2, picomatch@^2.2.3, picomatch@^2.3.1: pify@^2.0.0, pify@^2.3.0: version "2.3.0" resolved "https://registry.yarnpkg.com/pify/-/pify-2.3.0.tgz#ed141a6ac043a849ea588498e7dca8b15330e90c" - integrity sha1-7RQaasBDqEnqWISY59yosVMw6Qw= + integrity sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog== pify@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/pify/-/pify-3.0.0.tgz#e5a4acd2c101fdf3d9a4d07f0dbc4db49dd28176" - integrity sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY= + integrity sha512-C3FsVNH1udSEX48gGX1xfvwTWfsYWj5U+8/uK15BGzIGrKoUpghX8hWZwa/OFnakBiiVNmBvemTJR5mcy7iPcg== pify@^4.0.1: version "4.0.1" @@ -7518,14 +7620,14 @@ pify@^5.0.0: pinkie-promise@^2.0.0: version "2.0.1" resolved "https://registry.yarnpkg.com/pinkie-promise/-/pinkie-promise-2.0.1.tgz#2135d6dfa7a358c069ac9b178776288228450ffa" - integrity sha1-ITXW36ejWMBprJsXh3YogihFD/o= + integrity sha512-0Gni6D4UcLTbv9c57DfxDGdr41XfgUjqWZu492f0cIGr16zDU06BWP/RAEvOuo7CQ0CNjHaLlM59YJJFm3NWlw== dependencies: pinkie "^2.0.0" pinkie@^2.0.0: version "2.0.4" resolved "https://registry.yarnpkg.com/pinkie/-/pinkie-2.0.4.tgz#72556b80cfa0d48a974e80e77248e80ed4f7f870" - integrity sha1-clVrgM+g1IqXToDnckjoDtT3+HA= + integrity sha512-MnUuEycAemtSaeFSjXKW/aroV7akBbY+Sv+RkyqFjgAe73F+MR0TBWKBRDkmfWq/HiFmdavfZ1G7h4SPZXaCSg== pirates@^4.0.4: version "4.0.5" @@ -7562,7 +7664,7 @@ pluralize@^8.0.0: posix-character-classes@^0.1.0: version "0.1.1" resolved "https://registry.yarnpkg.com/posix-character-classes/-/posix-character-classes-0.1.1.tgz#01eac0fe3b5af71a2a6c02feabb8c1fef7e00eab" - integrity sha1-AerA/jta9xoqbAL+q7jB/vfgDqs= + integrity sha512-xTgYBc3fuo7Yt7JbiuFxSYGToMoz8fLoE6TC9Wx1P/u+LfeThMOAqmuyECnlBaaJb+u1m9hHiXUEtwW4OzfUJg== postcss@^7.0.16: version "7.0.39" @@ -7577,21 +7679,12 @@ prelude-ls@^1.2.1: resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396" integrity sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g== -pretty-format@^27.0.0, pretty-format@^27.5.1: - version "27.5.1" - resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-27.5.1.tgz#2181879fdea51a7a5851fb39d920faa63f01d88e" - integrity sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ== - dependencies: - ansi-regex "^5.0.1" - ansi-styles "^5.0.0" - react-is "^17.0.1" - -pretty-format@^28.1.0: - version "28.1.0" - resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-28.1.0.tgz#8f5836c6a0dfdb834730577ec18029052191af55" - integrity sha512-79Z4wWOYCdvQkEoEuSlBhHJqWeZ8D8YRPiPctJFCtvuaClGpiwiQYSCUOE6IEKUbbFukKOTFIUAXE8N4EQTo1Q== +pretty-format@^28.0.0, pretty-format@^28.1.3: + version "28.1.3" + resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-28.1.3.tgz#c9fba8cedf99ce50963a11b27d982a9ae90970d5" + integrity sha512-8gFb/To0OmxHR9+ZTb14Df2vNxdGCX8g1xWGUTqUw5TiZvcQf5sHKObd5UcPyLLyowNwDAMTF3XWOG1B6mxl1Q== dependencies: - "@jest/schemas" "^28.0.2" + "@jest/schemas" "^28.1.3" ansi-regex "^5.0.1" ansi-styles "^5.0.0" react-is "^18.0.0" @@ -7599,9 +7692,9 @@ pretty-format@^28.1.0: pretty-hrtime@^1.0.0: version "1.0.3" resolved "https://registry.yarnpkg.com/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz#b7e3ea42435a4c9b2759d99e0f201eb195802ee1" - integrity sha1-t+PqQkNaTJsnWdmeDyAesZWALuE= + integrity sha512-66hKPCr+72mlfiSjlEB1+45IjXSqvVAIy6mocupoww4tBFE9R9IhwwUGoI4G++Tc9Aq+2rxOt0RFU6gPcrte0A== -proc-log@^2.0.0: +proc-log@^2.0.0, proc-log@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/proc-log/-/proc-log-2.0.1.tgz#8f3f69a1f608de27878f91f5c688b225391cb685" integrity sha512-Kcmo2FhfDTXdcbfDH76N7uBYHINxc/8GW7UAVuVP9I+Va3uHSerrnKV6dLooga/gh7GlgzuCCr/eoldnL1muGw== @@ -7624,7 +7717,7 @@ promise-call-limit@^1.0.1: promise-inflight@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/promise-inflight/-/promise-inflight-1.0.1.tgz#98472870bf228132fcbdd868129bad12c3c029e3" - integrity sha1-mEcocL8igTL8vdhoEputEsPAKeM= + integrity sha512-6zWPyEOFaQBJYcGMHBKTKJ3u6TBsnMFOIZSa6ce1e/ZrrsOlnHRHbabMjLiBYKp+n44X9eUI6VUPaukCXHuG4g== promise-retry@^2.0.1: version "2.0.1" @@ -7652,24 +7745,24 @@ prompts@^2.0.1: promzard@^0.3.0: version "0.3.0" resolved "https://registry.yarnpkg.com/promzard/-/promzard-0.3.0.tgz#26a5d6ee8c7dee4cb12208305acfb93ba382a9ee" - integrity sha1-JqXW7ox97kyxIggwWs+5O6OCqe4= + integrity sha512-JZeYqd7UAcHCwI+sTOeUDYkvEU+1bQ7iE0UT1MgB/tERkAPkesW46MrpIySzODi+owTjZtiF8Ay5j9m60KmMBw== dependencies: read "1" proto-list@~1.2.1: version "1.2.4" resolved "https://registry.yarnpkg.com/proto-list/-/proto-list-1.2.4.tgz#212d5bfe1318306a420f6402b8e26ff39647a849" - integrity sha1-IS1b/hMYMGpCD2QCuOJv85ZHqEk= + integrity sha512-vtK/94akxsTMhe0/cbfpR+syPuszcuwhqVjJq26CuNDgFGj682oRBXOP5MJpv2r7JtE8MsiepGIqvvOTBwn2vA== -protocols@^1.1.0, protocols@^1.4.0: - version "1.4.8" - resolved "https://registry.yarnpkg.com/protocols/-/protocols-1.4.8.tgz#48eea2d8f58d9644a4a32caae5d5db290a075ce8" - integrity sha512-IgjKyaUSjsROSO8/D49Ab7hP8mJgTYcqApOqdPhLoPxAplXmkp+zRvsrSQjFn5by0rhm4VH0GAUELIPpx7B1yg== +protocols@^2.0.0, protocols@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/protocols/-/protocols-2.0.1.tgz#8f155da3fc0f32644e83c5782c8e8212ccf70a86" + integrity sha512-/XJ368cyBJ7fzLMwLKv1e4vLxOju2MNAIokcr7meSaNcVbWz/CPcW22cP04mwxOErdA5mwjA8Q6w/cdAQxVn7Q== prr@~1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/prr/-/prr-1.0.1.tgz#d3fc114ba06995a45ec6893f484ceb1d78f5f476" - integrity sha1-0/wRS6BplaRexok/SEzrHXj19HY= + integrity sha512-yPw4Sng1gWghHQWj0B3ZggWUm4qVbPwPFcRG8KyxiU7J2OHFSoEHKS+EZ3fv5l1t9CyCiop6l/ZYeWbrgoQejw== pump@^2.0.0: version "2.0.1" @@ -7696,24 +7789,7 @@ punycode@^2.1.0, punycode@^2.1.1: q@^1.5.1: version "1.5.1" resolved "https://registry.yarnpkg.com/q/-/q-1.5.1.tgz#7e32f75b41381291d04611f1bf14109ac00651d7" - integrity sha1-fjL3W0E4EpHQRhHxvxQQmsAGUdc= - -qs@^6.9.4: - version "6.10.3" - resolved "https://registry.yarnpkg.com/qs/-/qs-6.10.3.tgz#d6cde1b2ffca87b5aa57889816c5f81535e22e8e" - integrity sha512-wr7M2E0OFRfIfJZjKGieI8lBKb7fRCH4Fv5KNPEs7gJ8jadvotdsS08PzOKR7opXhZ/Xkjtt3WF9g38drmyRqQ== - dependencies: - side-channel "^1.0.4" - -query-string@^6.13.8: - version "6.14.1" - resolved "https://registry.yarnpkg.com/query-string/-/query-string-6.14.1.tgz#7ac2dca46da7f309449ba0f86b1fd28255b0c86a" - integrity sha512-XDxAeVmpfu1/6IjyT/gXHOl+S0vQ9owggJ30hhWKdHAsNPOcasn5o9BW0eejZqL2e4vMjhAxoW3jVHcD6mbcYw== - dependencies: - decode-uri-component "^0.2.0" - filter-obj "^1.1.0" - split-on-first "^1.0.0" - strict-uri-encode "^2.0.0" + integrity sha512-kV/CThkXo6xyFEZUugw/+pIOywXcDbFYgSct5cT3gqlbkBE1SJdwy6UQoZvodiWF/ckQLZyDE/Bu1M6gVu5lVw== queue-microtask@^1.2.2: version "1.2.3" @@ -7746,20 +7822,10 @@ randombytes@^2.1.0: dependencies: safe-buffer "^5.1.0" -react-is@^17.0.1: - version "17.0.2" - resolved "https://registry.yarnpkg.com/react-is/-/react-is-17.0.2.tgz#e691d4a8e9c789365655539ab372762b0efb54f0" - integrity sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w== - react-is@^18.0.0: - version "18.1.0" - resolved "https://registry.yarnpkg.com/react-is/-/react-is-18.1.0.tgz#61aaed3096d30eacf2a2127118b5b41387d32a67" - integrity sha512-Fl7FuabXsJnV5Q1qIOQwx/sagGF18kogb4gpfcG4gjLBWO0WDiiz1ko/ExayuxE7InyQkBLkxRFG5oxY6Uu3Kg== - -read-cmd-shim@^2.0.0: - version "2.0.0" - resolved "https://registry.yarnpkg.com/read-cmd-shim/-/read-cmd-shim-2.0.0.tgz#4a50a71d6f0965364938e9038476f7eede3928d9" - integrity sha512-HJpV9bQpkl6KwjxlJcBoqu9Ba0PQg8TqSNIOrulGt54a0uup0HtevreFHzYzkm0lpnleRdNBzXznKrgxglEHQw== + version "18.2.0" + resolved "https://registry.yarnpkg.com/react-is/-/react-is-18.2.0.tgz#199431eeaaa2e09f86427efbb4f1473edb47609b" + integrity sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w== read-cmd-shim@^3.0.0: version "3.0.0" @@ -7774,27 +7840,7 @@ read-package-json-fast@^2.0.2, read-package-json-fast@^2.0.3: json-parse-even-better-errors "^2.3.0" npm-normalize-package-bin "^1.0.1" -read-package-json@^3.0.0: - version "3.0.1" - resolved "https://registry.yarnpkg.com/read-package-json/-/read-package-json-3.0.1.tgz#c7108f0b9390257b08c21e3004d2404c806744b9" - integrity sha512-aLcPqxovhJTVJcsnROuuzQvv6oziQx4zd3JvG0vGCL5MjTONUc4uJ90zCBC6R7W7oUKBNoR/F8pkyfVwlbxqng== - dependencies: - glob "^7.1.1" - json-parse-even-better-errors "^2.3.0" - normalize-package-data "^3.0.0" - npm-normalize-package-bin "^1.0.0" - -read-package-json@^4.1.1: - version "4.1.2" - resolved "https://registry.yarnpkg.com/read-package-json/-/read-package-json-4.1.2.tgz#b444d047de7c75d4a160cb056d00c0693c1df703" - integrity sha512-Dqer4pqzamDE2O4M55xp1qZMuLPqi4ldk2ya648FOMHRjwMzFhuxVrG04wd0c38IsvkVdr3vgHI6z+QTPdAjrQ== - dependencies: - glob "^7.1.1" - json-parse-even-better-errors "^2.3.0" - normalize-package-data "^3.0.0" - npm-normalize-package-bin "^1.0.0" - -read-package-json@^5.0.0: +read-package-json@^5.0.0, read-package-json@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/read-package-json/-/read-package-json-5.0.1.tgz#1ed685d95ce258954596b13e2e0e76c7d0ab4c26" integrity sha512-MALHuNgYWdGW3gKzuNMuYtcSSZbGQm94fAp16xt8VsYTLBjUSc55bLMKe6gzpWue0Tfi6CBgwCSdDAqutGDhMg== @@ -7807,7 +7853,7 @@ read-package-json@^5.0.0: read-pkg-up@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-1.0.1.tgz#9d63c13276c065918d57f002a57f40a1b643fb02" - integrity sha1-nWPBMnbAZZGNV/ACpX9AobZD+wI= + integrity sha512-WD9MTlNtI55IwYUS27iHh9tK3YoIVhxis8yKhLpTqWtml739uXc9NWTpxoHkfZf3+DkCCsXox94/VWZniuZm6A== dependencies: find-up "^1.0.0" read-pkg "^1.0.0" @@ -7815,7 +7861,7 @@ read-pkg-up@^1.0.1: read-pkg-up@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-3.0.0.tgz#3ed496685dba0f8fe118d0691dc51f4a1ff96f07" - integrity sha1-PtSWaF26D4/hGNBpHcUfSh/5bwc= + integrity sha512-YFzFrVvpC6frF1sz8psoHDBGF7fLPc+llq/8NB43oagqWkx8ar5zYtsTORtOjw9W2RHLpWP+zTWwBvf1bCmcSw== dependencies: find-up "^2.0.0" read-pkg "^3.0.0" @@ -7841,7 +7887,7 @@ read-pkg-up@^8.0.0: read-pkg@^1.0.0: version "1.1.0" resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-1.1.0.tgz#f5ffaa5ecd29cb31c0474bca7d756b6bb29e3f28" - integrity sha1-9f+qXs0pyzHAR0vKfXVra7KePyg= + integrity sha512-7BGwRHqt4s/uVbuyoeejRn4YmFnYZiFl4AuaeXHlgZf3sONF0SOGlxs2Pw8g6hCKupo08RafIO5YXFNOKTfwsQ== dependencies: load-json-file "^1.0.0" normalize-package-data "^2.3.2" @@ -7850,7 +7896,7 @@ read-pkg@^1.0.0: read-pkg@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-3.0.0.tgz#9cbc686978fee65d16c00e2b19c237fcf6e38389" - integrity sha1-nLxoaXj+5l0WwA4rGcI3/Pbjg4k= + integrity sha512-BLq/cCO9two+lBgiTYNqD6GdtK8s4NpaWrl6/rCO9w0TUS8oJl7cmToOZfRYllKTISY6nt1U7jQ53brmKqY6BA== dependencies: load-json-file "^4.0.0" normalize-package-data "^2.3.2" @@ -7876,14 +7922,14 @@ read-pkg@^6.0.0: parse-json "^5.2.0" type-fest "^1.0.1" -read@1, read@~1.0.1: +read@1, read@^1.0.7: version "1.0.7" resolved "https://registry.yarnpkg.com/read/-/read-1.0.7.tgz#b3da19bd052431a97671d44a42634adf710b40c4" - integrity sha1-s9oZvQUkMal2cdRKQmNK33ELQMQ= + integrity sha512-rSOKNYUmaxy0om1BNjMN4ezNT6VKK+2xF4GBhc81mkH7L60i6dp8qPYrkndNLT3QPphoII3maL9PVC9XmhHwVQ== dependencies: mute-stream "~0.0.4" -"readable-stream@2 || 3", readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.0.2, readable-stream@^3.6.0: +"readable-stream@2 || 3", readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.0.2, readable-stream@^3.1.1, readable-stream@^3.4.0, readable-stream@^3.6.0: version "3.6.0" resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198" integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA== @@ -7892,7 +7938,7 @@ read@1, read@~1.0.1: string_decoder "^1.1.1" util-deprecate "^1.0.1" -readable-stream@^2.0.0, readable-stream@^2.0.1, readable-stream@^2.0.2, readable-stream@^2.0.5, readable-stream@^2.0.6, readable-stream@^2.1.5, readable-stream@^2.2.2, readable-stream@^2.3.3, readable-stream@^2.3.5, readable-stream@^2.3.6, readable-stream@~2.3.6: +readable-stream@^2.0.0, readable-stream@^2.0.1, readable-stream@^2.0.2, readable-stream@^2.0.5, readable-stream@^2.1.5, readable-stream@^2.2.2, readable-stream@^2.3.3, readable-stream@^2.3.5, readable-stream@^2.3.6, readable-stream@~2.3.6: version "2.3.7" resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-2.3.7.tgz#1eca1cf711aef814c04f62252a36a62f6cb23b57" integrity sha512-Ebho8K4jIbHAxnuxi7o42OrZgF/ZTNcsZj6nRKyUmkhLFq8CHItp/fy6hQZuZmP/n3yZ9VBUbp4zz/mX8hmYPw== @@ -7924,10 +7970,17 @@ readdirp@^2.2.1: micromatch "^3.1.10" readable-stream "^2.0.2" +readdirp@~3.6.0: + version "3.6.0" + resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-3.6.0.tgz#74a370bd857116e245b29cc97340cd431a02a6c7" + integrity sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA== + dependencies: + picomatch "^2.2.1" + rechoir@^0.6.2: version "0.6.2" resolved "https://registry.yarnpkg.com/rechoir/-/rechoir-0.6.2.tgz#85204b54dba82d5742e28c96756ef43af50e3384" - integrity sha1-hSBLVNuoLVdC4oyWdW70OvUOM4Q= + integrity sha512-HFM8rkZ+i3zrV+4LQjwQ0W+ez98pApMGM3HUrN04j3CqzPOzl9nmP15Y8YXNm8QHGv/eacOVEjqhmWpkRV0NAw== dependencies: resolve "^1.1.6" @@ -7981,7 +8034,7 @@ remove-bom-buffer@^3.0.0: remove-bom-stream@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/remove-bom-stream/-/remove-bom-stream-1.2.0.tgz#05f1a593f16e42e1fb90ebf59de8e569525f9523" - integrity sha1-BfGlk/FuQuH7kOv1nejlaVJflSM= + integrity sha512-wigO8/O08XHb8YPzpDDT+QmRANfW6vLqxfaXm1YXhnFf3AkSLyjfG3GEFg4McZkmgL7KvCj5u2KczkvSP6NfHA== dependencies: remove-bom-buffer "^3.0.0" safe-buffer "^5.1.0" @@ -7990,7 +8043,7 @@ remove-bom-stream@^1.2.0: remove-trailing-separator@^1.0.1, remove-trailing-separator@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz#c24bce2a283adad5bc3f58e0d48249b92379d8ef" - integrity sha1-wkvOKig62tW8P1jg1IJJuSN52O8= + integrity sha512-/hS+Y0u3aOfIETiaiirUFwDBDzmXPvO+jAfKTitUngIPzdKc6Z0LoFjM/CK5PL4C+eKwHohlHAb6H0VFfmmUsw== repeat-element@^1.1.2: version "1.1.4" @@ -8000,7 +8053,7 @@ repeat-element@^1.1.2: repeat-string@^1.5.4, repeat-string@^1.6.1: version "1.6.1" resolved "https://registry.yarnpkg.com/repeat-string/-/repeat-string-1.6.1.tgz#8dcae470e1c88abc2d600fff4a776286da75e637" - integrity sha1-jcrkcOHIirwtYA//Sndihtp15jc= + integrity sha512-PV0dzCYDNfRi1jCDbJzpW7jNNDRuCOG/jI5ctQcGKt/clZD+YcPS3yIlWuTJMmESC8aevCFmWJy5wjAFgNqN6w== replace-ext@^1.0.0: version "1.0.1" @@ -8010,7 +8063,7 @@ replace-ext@^1.0.0: replace-homedir@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/replace-homedir/-/replace-homedir-1.0.0.tgz#e87f6d513b928dde808260c12be7fec6ff6e798c" - integrity sha1-6H9tUTuSjd6AgmDBK+f+xv9ueYw= + integrity sha512-CHPV/GAglbIB1tnQgaiysb8H2yCy8WQ7lcEwQ/eT+kLj0QHV8LnJW0zpqpE7RSkrMSRoa+EBoag86clf7WAgSg== dependencies: homedir-polyfill "^1.0.1" is-absolute "^1.0.0" @@ -8028,12 +8081,12 @@ replacestream@^4.0.3: require-directory@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42" - integrity sha1-jGStX9MNqxyXbiNE/+f3kqam30I= + integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q== require-main-filename@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/require-main-filename/-/require-main-filename-1.0.1.tgz#97f717b69d48784f5f526a6c5aa8ffdda055a4d1" - integrity sha1-l/cXtp1IeE9fUmpsWqj/3aBVpNE= + integrity sha512-IqSUtOVP4ksd1C/ej5zeEh/BIP2ajqpn8c5x+q99gvcIG/Qf0cud5raVnE/Dwd0ua9TXYDoDc0RE5hBSdz22Ug== resolve-cwd@^3.0.0: version "3.0.0" @@ -8045,7 +8098,7 @@ resolve-cwd@^3.0.0: resolve-dir@^1.0.0, resolve-dir@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/resolve-dir/-/resolve-dir-1.0.1.tgz#79a40644c362be82f26effe739c9bb5382046f43" - integrity sha1-eaQGRMNivoLybv/nOcm7U4IEb0M= + integrity sha512-R7uiTjECzvOsWSfdM0QKFNBVFcK27aHOUwdvK53BcW8zqnGdYp0Fbj82cy54+2A4P2tFM22J5kRfe1R+lM/1yg== dependencies: expand-tilde "^2.0.0" global-modules "^1.0.0" @@ -8063,14 +8116,14 @@ resolve-from@^5.0.0: resolve-options@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/resolve-options/-/resolve-options-1.1.0.tgz#32bb9e39c06d67338dc9378c0d6d6074566ad131" - integrity sha1-MrueOcBtZzONyTeMDW1gdFZq0TE= + integrity sha512-NYDgziiroVeDC29xq7bp/CacZERYsA9bXYd1ZmcJlF3BcrZv5pTb4NG7SjdyKDnXZ84aC4vo2u6sNKIA1LCu/A== dependencies: value-or-function "^3.0.0" resolve-url@^0.2.1: version "0.2.1" resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a" - integrity sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo= + integrity sha512-ZuF55hVUQaaczgOIwqWzkEcEidmlD/xl44x1UZnhOXcYuFN2S6+rcxpG+C1N3So0wvNI3DmJICUFfu2SxhBmvg== resolve.exports@^1.1.0: version "1.1.0" @@ -8078,11 +8131,11 @@ resolve.exports@^1.1.0: integrity sha512-J1l+Zxxp4XK3LUDZ9m60LRJF/mAe4z6a4xyabPHk7pvK5t35dACV32iIjJDFeWZFfZlO29w6SZ67knR0tHzJtQ== resolve@^1.1.6, resolve@^1.1.7, resolve@^1.10.0, resolve@^1.19.0, resolve@^1.20.0, resolve@^1.4.0: - version "1.22.0" - resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.0.tgz#5e0b8c67c15df57a89bdbabe603a002f21731198" - integrity sha512-Hhtrw0nLeSrFQ7phPp4OOcVjLPIeMnRlr5mcnVuMe7M/7eBn98A3hmFRLoFo3DLZkivSYwhRUJTyPyWAk56WLw== + version "1.22.1" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.1.tgz#27cb2ebb53f91abb49470a928bba7558066ac177" + integrity sha512-nBpuuYuY5jFsli/JIs1oldw6fOQCBioohqWZg/2hiaOybXOft4lonv85uDOKXdf8rhyK159cxU5cDcK/NKk8zw== dependencies: - is-core-module "^2.8.1" + is-core-module "^2.9.0" path-parse "^1.0.7" supports-preserve-symlinks-flag "^1.0.0" @@ -8102,7 +8155,7 @@ ret@~0.1.10: retry@^0.12.0: version "0.12.0" resolved "https://registry.yarnpkg.com/retry/-/retry-0.12.0.tgz#1b42a6266a21f07421d1b0b54b7dc167b01c013b" - integrity sha1-G0KmJmoh8HQh0bC1S33BZ7AcATs= + integrity sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow== reusify@^1.0.4: version "1.0.4" @@ -8116,10 +8169,10 @@ rimraf@^3.0.0, rimraf@^3.0.2: dependencies: glob "^7.1.3" -rollup@2.75.4: - version "2.75.4" - resolved "https://registry.yarnpkg.com/rollup/-/rollup-2.75.4.tgz#c3518c326c98e508b628a93015a03a276c331f22" - integrity sha512-JgZiJMJkKImMZJ8ZY1zU80Z2bA/TvrL/7D9qcBCrfl2bP+HUaIw0QHUroB4E3gBpFl6CRFM1YxGbuYGtdAswbQ== +rollup@2.77.2: + version "2.77.2" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-2.77.2.tgz#6b6075c55f9cc2040a5912e6e062151e42e2c4e3" + integrity sha512-m/4YzYgLcpMQbxX3NmAqDvwLATZzxt8bIegO78FZLl+lAgKJBd1DRAOeEiZcKOIOPjxE6ewHWHNgGEalFXuz1g== optionalDependencies: fsevents "~2.3.2" @@ -8135,20 +8188,13 @@ run-parallel@^1.1.9: dependencies: queue-microtask "^1.2.2" -rxjs@7.5.5: - version "7.5.5" - resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-7.5.5.tgz#2ebad89af0f560f460ad5cc4213219e1f7dd4e9f" - integrity sha512-sy+H0pQofO95VDmFLzyaw9xNJU4KTRSwQIGM6+iG3SypAtCiLDzpeG8sJrNCWn2Up9km+KhkvTdbkrdy+yzZdw== +rxjs@7.5.6, rxjs@^7.5.5: + version "7.5.6" + resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-7.5.6.tgz#0446577557862afd6903517ce7cae79ecb9662bc" + integrity sha512-dnyv2/YsXhnm461G+R/Pe5bWP41Nm6LBXEYWI6eiFP4fiwx6WRI/CD0zbdVAudd9xwLEF2IDcKXLHit0FYjUzw== dependencies: tslib "^2.1.0" -rxjs@^6.6.0: - version "6.6.7" - resolved "https://registry.yarnpkg.com/rxjs/-/rxjs-6.6.7.tgz#90ac018acabf491bf65044235d5863c4dab804c9" - integrity sha512-hTdwr+7yYNIT5n4AMYp85KA6yw2Va0FLa3Rguvbpa4W3I5xynaBZo41cM3XM+4Q6fRMj3sBYIR1VAmZMXYJvRQ== - dependencies: - tslib "^1.9.0" - safe-buffer@^5.1.0, safe-buffer@^5.1.1, safe-buffer@~5.2.0: version "5.2.1" resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6" @@ -8162,7 +8208,7 @@ safe-buffer@~5.1.0, safe-buffer@~5.1.1: safe-regex@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/safe-regex/-/safe-regex-1.1.0.tgz#40a3669f3b077d1e943d44629e157dd48023bf2e" - integrity sha1-QKNmnzsHfR6UPURinhV91IAjvy4= + integrity sha512-aJXcif4xnaNUzvUuC5gcb46oTS7zvg4jpMTnuqtrEPlR3vFr4pxtdTwaF1Qs3Enjn9HK+ZlwQui+a7z0SywIzg== dependencies: ret "~0.1.10" @@ -8195,7 +8241,7 @@ schema-utils@^3.1.0, schema-utils@^3.1.1: semver-greatest-satisfied-range@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/semver-greatest-satisfied-range/-/semver-greatest-satisfied-range-1.1.0.tgz#13e8c2658ab9691cb0cd71093240280d36f77a5b" - integrity sha1-E+jCZYq5aRywzXEJMkAoDTb3els= + integrity sha512-Ny/iyOzSSa8M5ML46IAx3iXc6tfOsYU2R4AXi2UpHk60Zrgyq6eqPj/xiOfS0rRl/iiQ/rdJkVjw/5cdUyCntQ== dependencies: sver-compat "^1.5.0" @@ -8204,7 +8250,14 @@ semver-greatest-satisfied-range@^1.1.0: resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7" integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ== -semver@7.x, semver@^7.0.0, semver@^7.1.1, semver@^7.1.3, semver@^7.3.4, semver@^7.3.5, semver@^7.3.7: +semver@7.3.4: + version "7.3.4" + resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.4.tgz#27aaa7d2e4ca76452f98d3add093a72c943edc97" + integrity sha512-tCfb2WLjqFAtXn4KEdxIhalnRtoKFN7nAwj0B3ZXCbQloV2tq5eDbcTmT68JJD3nRJq24/XgxtQKFIpQdtvmVw== + dependencies: + lru-cache "^6.0.0" + +semver@7.x, semver@^7.0.0, semver@^7.1.1, semver@^7.3.4, semver@^7.3.5, semver@^7.3.7: version "7.3.7" resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.7.tgz#12c5b649afdbf9049707796e22a4028814ce523f" integrity sha512-QlYTucUYOews+WeEujDoEGziz4K6c47V/Bd+LjSSYcA94p+DmINdf7ncaUinThfvZyu13lN9OY1XDxt8C0Tw0g== @@ -8223,10 +8276,10 @@ serialize-javascript@^6.0.0: dependencies: randombytes "^2.1.0" -set-blocking@^2.0.0, set-blocking@~2.0.0: +set-blocking@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7" - integrity sha1-BF+XgtARrppoA93TgrJDkrPYkPc= + integrity sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw== set-value@^2.0.0, set-value@^2.0.1: version "2.0.1" @@ -8266,16 +8319,7 @@ shiki@^0.10.1: vscode-oniguruma "^1.6.1" vscode-textmate "5.2.0" -side-channel@^1.0.4: - version "1.0.4" - resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.4.tgz#efce5c8fdc104ee751b25c58d4290011fa5ea2cf" - integrity sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw== - dependencies: - call-bind "^1.0.0" - get-intrinsic "^1.0.2" - object-inspect "^1.9.0" - -signal-exit@^3.0.0, signal-exit@^3.0.2, signal-exit@^3.0.3, signal-exit@^3.0.7: +signal-exit@^3.0.2, signal-exit@^3.0.3, signal-exit@^3.0.7: version "3.0.7" resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9" integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ== @@ -8299,6 +8343,11 @@ slash@^3.0.0: resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634" integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q== +slash@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/slash/-/slash-4.0.0.tgz#2422372176c4c6c5addb5e2ada885af984b396a7" + integrity sha512-3dOsAHXXUkQTpOYcoAxLIorMTp4gIQr5IW3iVb7A7lFIp0VHhnynm9izx6TssdrIcVIESAlVjtnO2K8bg+Coew== + slice-ansi@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/slice-ansi/-/slice-ansi-4.0.0.tgz#500e8dd0fd55b05815086255b3195adf2a45fe6b" @@ -8343,36 +8392,27 @@ snapdragon@^0.8.1: source-map-resolve "^0.5.0" use "^3.1.0" -socks-proxy-agent@^5.0.0: - version "5.0.1" - resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-5.0.1.tgz#032fb583048a29ebffec2e6a73fca0761f48177e" - integrity sha512-vZdmnjb9a2Tz6WEQVIurybSwElwPxMZaIc7PzqbJTrezcKNznv6giT7J7tZDZ1BojVaa1jvO/UiUdhDVB0ACoQ== - dependencies: - agent-base "^6.0.2" - debug "4" - socks "^2.3.3" - -socks-proxy-agent@^6.0.0, socks-proxy-agent@^6.1.1: - version "6.2.1" - resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-6.2.1.tgz#2687a31f9d7185e38d530bef1944fe1f1496d6ce" - integrity sha512-a6KW9G+6B3nWZ1yB8G7pJwL3ggLy1uTzKAgCb7ttblwqdz9fMGJUuTy3uFzEP48FAs9FLILlmzDlE2JJhVQaXQ== +socks-proxy-agent@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-7.0.0.tgz#dc069ecf34436621acb41e3efa66ca1b5fed15b6" + integrity sha512-Fgl0YPZ902wEsAyiQ+idGd1A7rSFx/ayC1CQVMw5P+EQx2V0SgpGtf6OKFhVjPflPUl9YMmEOnmfjCdMUsygww== dependencies: agent-base "^6.0.2" debug "^4.3.3" socks "^2.6.2" -socks@^2.3.3, socks@^2.6.2: - version "2.6.2" - resolved "https://registry.yarnpkg.com/socks/-/socks-2.6.2.tgz#ec042d7960073d40d94268ff3bb727dc685f111a" - integrity sha512-zDZhHhZRY9PxRruRMR7kMhnf3I8hDs4S3f9RecfnGxvcBHQcKcIH/oUcEWffsfl1XxdYlA7nnlGbbTvPz9D8gA== +socks@^2.6.2: + version "2.7.0" + resolved "https://registry.yarnpkg.com/socks/-/socks-2.7.0.tgz#f9225acdb841e874dca25f870e9130990f3913d0" + integrity sha512-scnOe9y4VuiNUULJN72GrM26BNOjVsfPXI+j+98PkyEfsIXroa5ofyjT+FzGvn/xHs73U2JtoBYAVx9Hl4quSA== dependencies: - ip "^1.1.5" + ip "^2.0.0" smart-buffer "^4.2.0" sort-keys@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/sort-keys/-/sort-keys-2.0.0.tgz#658535584861ec97d730d6cf41822e1f56684128" - integrity sha1-ZYU1WEhh7JfXMNbPQYIuH1ZoQSg= + integrity sha512-/dPCrG1s3ePpWm6yBbxZq5Be1dXGLyLn9Z791chDC3NFrpkVbWGzkBwPN1knaciexFXgRJ7hzdnwZ4stHSDmjg== dependencies: is-plain-obj "^1.0.0" @@ -8426,7 +8466,7 @@ source-map-url@^0.4.0: source-map@^0.5.1, source-map@^0.5.6: version "0.5.7" resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc" - integrity sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w= + integrity sha512-LbrmJOMUSdEVxIKvdcJzQC+nQhe8FUZQTXQy6+I75skNgn3OoQ0DZA8YnFa7gp8tqtL3KPf1kmo0R5DoApeSGQ== source-map@^0.6.0, source-map@^0.6.1: version "0.6.1" @@ -8434,9 +8474,9 @@ source-map@^0.6.0, source-map@^0.6.1: integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g== source-map@^0.7.3: - version "0.7.3" - resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.7.3.tgz#5302f8169031735226544092e64981f751750383" - integrity sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ== + version "0.7.4" + resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.7.4.tgz#a9bbe705c9d8846f4e08ff6765acf0f1b0898656" + integrity sha512-l3BikUxvPOcn5E74dZiq5BGsTb5yEwhaTSzccU6t4sDOH8NWJCstKO5QT2CvtFoK6F0saL7p9xHAqHOlCPJygA== sparkles@^1.0.0: version "1.0.1" @@ -8469,11 +8509,6 @@ spdx-license-ids@^3.0.0: resolved "https://registry.yarnpkg.com/spdx-license-ids/-/spdx-license-ids-3.0.11.tgz#50c0d8c40a14ec1bf449bae69a0ea4685a9d9f95" integrity sha512-Ctl2BrFiM0X3MANYgj3CkygxhRmr9mi6xhejbdO960nF6EDJApTYpn0BQnDKlnNBULKiCN1n3w9EBkHK8ZWg+g== -split-on-first@^1.0.0: - version "1.1.0" - resolved "https://registry.yarnpkg.com/split-on-first/-/split-on-first-1.1.0.tgz#f610afeee3b12bce1d0c30425e76398b78249a5f" - integrity sha512-43ZssAJaMusuKWL8sKUBQXHWOpq8d6CfN/u1p4gUzfJkM05C8rxTmYrkIPTXapZpORA6LkkzcUulJ8FqA7Uudw== - split-string@^3.0.1, split-string@^3.0.2: version "3.1.0" resolved "https://registry.yarnpkg.com/split-string/-/split-string-3.1.0.tgz#7cb09dda3a86585705c64b39a6466038682e8fe2" @@ -8498,16 +8533,9 @@ split@^1.0.0: sprintf-js@~1.0.2: version "1.0.3" resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c" - integrity sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw= - -ssri@^8.0.0, ssri@^8.0.1: - version "8.0.1" - resolved "https://registry.yarnpkg.com/ssri/-/ssri-8.0.1.tgz#638e4e439e2ffbd2cd289776d5ca457c4f51a2af" - integrity sha512-97qShzy1AiyxvPNIkLWoGua7xoQzzPjQ0HAH4B0rWKo7SZ6USuPcrUiAFrws0UH8RrbWmgq3LMTObhPIHbbBeQ== - dependencies: - minipass "^3.1.1" + integrity sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g== -ssri@^9.0.0: +ssri@^9.0.0, ssri@^9.0.1: version "9.0.1" resolved "https://registry.yarnpkg.com/ssri/-/ssri-9.0.1.tgz#544d4c357a8d7b71a19700074b6883fcb4eae057" integrity sha512-o57Wcn66jMQvfHG1FlYbWeZWW/dHZhJXjpIcTfXldXEk5nz5lStPo3mK0OJQfGR3RbZUlbISexbljkJzuEj/8Q== @@ -8517,7 +8545,7 @@ ssri@^9.0.0: stack-trace@0.0.10: version "0.0.10" resolved "https://registry.yarnpkg.com/stack-trace/-/stack-trace-0.0.10.tgz#547c70b347e8d32b4e108ea1a2a159e5fdde19c0" - integrity sha1-VHxws0fo0ytOEI6hoqFZ5f3eGcA= + integrity sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg== stack-utils@^2.0.3: version "2.0.5" @@ -8529,12 +8557,12 @@ stack-utils@^2.0.3: static-extend@^0.1.1: version "0.1.2" resolved "https://registry.yarnpkg.com/static-extend/-/static-extend-0.1.2.tgz#60809c39cbff55337226fd5e0b520f341f1fb5c6" - integrity sha1-YICcOcv/VTNyJv1eC1IPNB8ftcY= + integrity sha512-72E9+uLc27Mt718pMHt9VMNiAL4LMsmDbBva8mxWUCkT07fSzEGMYUCk0XWY6lp0j6RBAG4cJ3mWuZv2OE3s0g== dependencies: define-property "^0.2.5" object-copy "^0.1.0" -stream-exhaust@^1.0.1: +stream-exhaust@^1.0.1, stream-exhaust@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/stream-exhaust/-/stream-exhaust-1.0.2.tgz#acdac8da59ef2bc1e17a2c0ccf6c320d120e555d" integrity sha512-b/qaq/GlBK5xaq1yrK9/zFcyRSTNxmcZwFLGSTG0mXgZl/4Z6GgiyYOXOvY7N3eEvFRAG1bkDRz5EPGSvPYQlw== @@ -8544,11 +8572,6 @@ stream-shift@^1.0.0: resolved "https://registry.yarnpkg.com/stream-shift/-/stream-shift-1.0.1.tgz#d7088281559ab2778424279b0877da3c392d5a3d" integrity sha512-AiisoFqQ0vbGcZgQPY1cdP2I76glaVA/RauYR4G4thNFgkTqr90yXTo4LYX60Jl+sIlPNHHdGSwo01AvbKUSVQ== -strict-uri-encode@^2.0.0: - version "2.0.0" - resolved "https://registry.yarnpkg.com/strict-uri-encode/-/strict-uri-encode-2.0.0.tgz#b9c7330c7042862f6b142dc274bbcc5866ce3546" - integrity sha1-ucczDHBChi9rFC3CdLvMWGbONUY= - string-length@^4.0.1: version "4.0.2" resolved "https://registry.yarnpkg.com/string-length/-/string-length-4.0.2.tgz#a8a8dc7bd5c1a82b9b3c8b87e125f66871b6e57a" @@ -8560,7 +8583,7 @@ string-length@^4.0.1: string-width@^1.0.1, string-width@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/string-width/-/string-width-1.0.2.tgz#118bdf5b8cdc51a2a7e70d211e07e2b0b9b107d3" - integrity sha1-EYvfW4zcUaKn5w0hHgfisLmxB9M= + integrity sha512-0XsVpQLnVCXHJfyEs8tC0zpTVIr5PKKsQtkT29IwupnPTjtPmQ3xT/4yCREF9hYkV/3M3kzcUTSAZT6a6h81tw== dependencies: code-point-at "^1.0.0" is-fullwidth-code-point "^1.0.0" @@ -8592,7 +8615,7 @@ string_decoder@~1.1.1: strip-ansi@^3.0.0, strip-ansi@^3.0.1: version "3.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-3.0.1.tgz#6a385fb8853d952d5ff05d0e8aaf94278dc63dcf" - integrity sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8= + integrity sha512-VhumSSbBqDTP8p2ZLKj40UjBCV4+v8bUSEpUb4KjRgWk9pbqGF4REFj6KEagidb2f/M6AzC0EmFyDNGaw9OCzg== dependencies: ansi-regex "^2.0.0" @@ -8606,19 +8629,19 @@ strip-ansi@^6.0.0, strip-ansi@^6.0.1: strip-bom-string@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/strip-bom-string/-/strip-bom-string-1.0.0.tgz#e5211e9224369fbb81d633a2f00044dc8cedad92" - integrity sha1-5SEekiQ2n7uB1jOi8ABE3IztrZI= + integrity sha512-uCC2VHvQRYu+lMh4My/sFNmF2klFymLX1wHJeXnbEJERpV/ZsVuonzerjfrGpIGF7LBVa1O7i9kjiWvJiFck8g== strip-bom@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-2.0.0.tgz#6219a85616520491f35788bdbf1447a99c7e6b0e" - integrity sha1-YhmoVhZSBJHzV4i9vxRHqZx+aw4= + integrity sha512-kwrX1y7czp1E69n2ajbG65mIo9dqvJ+8aBQXOGVxqwvNbsXdFM6Lq37dLAY3mknUwru8CfcCbfOLL/gMo+fi3g== dependencies: is-utf8 "^0.2.0" strip-bom@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-3.0.0.tgz#2334c18e9c759f7bdd56fdef7e9ae3d588e68ed3" - integrity sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM= + integrity sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA== strip-bom@^4.0.0: version "4.0.0" @@ -8695,7 +8718,7 @@ supports-preserve-symlinks-flag@^1.0.0: sver-compat@^1.5.0: version "1.5.0" resolved "https://registry.yarnpkg.com/sver-compat/-/sver-compat-1.5.0.tgz#3cf87dfeb4d07b4a3f14827bc186b3fd0c645cd8" - integrity sha1-PPh9/rTQe0o/FIJ7wYaz/QxkXNg= + integrity sha512-aFTHfmjwizMNlNE6dsGmoAM4lHjL0CyiobWaFiXWSlD7cIxshW422Nb8KbXCmR6z+0ZEPY+daXJrDyh/vuwTyg== dependencies: es6-iterator "^2.0.1" es6-symbol "^3.1.1" @@ -8715,7 +8738,18 @@ tapable@^2.1.1, tapable@^2.2.0: resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.1.tgz#1967a73ef4060a82f12ab96af86d52fdb76eeca0" integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ== -tar@^6.0.2, tar@^6.1.0, tar@^6.1.11, tar@^6.1.2: +tar-stream@~2.2.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-2.2.0.tgz#acad84c284136b060dc3faa64474aa9aebd77287" + integrity sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ== + dependencies: + bl "^4.0.3" + end-of-stream "^1.4.1" + fs-constants "^1.0.0" + inherits "^2.0.3" + readable-stream "^3.1.1" + +tar@^6.1.0, tar@^6.1.11, tar@^6.1.2: version "6.1.11" resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.11.tgz#6760a38f003afa1b2ffd0ffe9e9abbd0eab3d621" integrity sha512-an/KZQzQUkZCkuoAA64hM92X0Urb6VpRhAFllDzz44U2mcD5scmT3zBc4VgVpkugF580+DQn8eAFSyoQt0tznA== @@ -8730,7 +8764,7 @@ tar@^6.0.2, tar@^6.1.0, tar@^6.1.11, tar@^6.1.2: temp-dir@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/temp-dir/-/temp-dir-1.0.0.tgz#0a7c0ea26d3a39afa7e0ebea9c1fc0bc4daa011d" - integrity sha1-CnwOom06Oa+n4OvqnB/AvE2qAR0= + integrity sha512-xZFXEGbG7SNC3itwBzI3RYjq/cEhBkx2hJuKGIUOcEULmkQExXiHat2z/qkISYsuR+IKumhEfKKbV5qXmhICFQ== terminal-link@^2.0.0: version "2.1.1" @@ -8741,20 +8775,20 @@ terminal-link@^2.0.0: supports-hyperlinks "^2.0.0" terser-webpack-plugin@^5.1.3: - version "5.3.1" - resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-5.3.1.tgz#0320dcc270ad5372c1e8993fabbd927929773e54" - integrity sha512-GvlZdT6wPQKbDNW/GDQzZFg/j4vKU96yl2q6mcUkzKOgW4gwf1Z8cZToUCrz31XHlPWH8MVb1r2tFtdDtTGJ7g== + version "5.3.3" + resolved "https://registry.yarnpkg.com/terser-webpack-plugin/-/terser-webpack-plugin-5.3.3.tgz#8033db876dd5875487213e87c627bca323e5ed90" + integrity sha512-Fx60G5HNYknNTNQnzQ1VePRuu89ZVYWfjRAeT5rITuCY/1b08s49e5kSQwHDirKZWuoKOBRFS98EUUoZ9kLEwQ== dependencies: + "@jridgewell/trace-mapping" "^0.3.7" jest-worker "^27.4.5" schema-utils "^3.1.1" serialize-javascript "^6.0.0" - source-map "^0.6.1" terser "^5.7.2" terser@^5.7.2, terser@^5.9.0: - version "5.14.0" - resolved "https://registry.yarnpkg.com/terser/-/terser-5.14.0.tgz#eefeec9af5153f55798180ee2617f390bdd285e2" - integrity sha512-JC6qfIEkPBd9j1SMO3Pfn+A6w2kQV54tv+ABQLgZr7dA3k/DL/OBoYSWxzVpZev3J+bUHXfr55L8Mox7AaNo6g== + version "5.14.2" + resolved "https://registry.yarnpkg.com/terser/-/terser-5.14.2.tgz#9ac9f22b06994d736174f4091aa368db896f1c10" + integrity sha512-oL0rGeM/WFQCUd0y2QrWxYnq7tfSuKBiqTjRPWrRgB46WD/kiwHwF8T23z78H6Q6kGCuuHcPB+KULHRdxvVGQA== dependencies: "@jridgewell/source-map" "^0.3.2" acorn "^8.5.0" @@ -8778,18 +8812,13 @@ text-extensions@^1.0.0: text-table@^0.2.0: version "0.2.0" resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4" - integrity sha1-f17oI66AUgfACvLfSoTsP8+lcLQ= + integrity sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw== textextensions@^3.2.0: version "3.3.0" resolved "https://registry.yarnpkg.com/textextensions/-/textextensions-3.3.0.tgz#03530d5287b86773c08b77458589148870cc71d3" integrity sha512-mk82dS8eRABNbeVJrEiN5/UMSCliINAuz8mkUwH4SwslkNP//gbEzlWNS5au0z5Dpx40SQxzqZevZkn+WYJ9Dw== -throat@^6.0.1: - version "6.0.1" - resolved "https://registry.yarnpkg.com/throat/-/throat-6.0.1.tgz#d514fedad95740c12c2d7fc70ea863eb51ade375" - integrity sha512-8hmiGIJMDlwjg7dlJ4yKGLK8EsYqKgPWbG3b4wjJddKNwc7N7Dpn08Df4szr/sZdMVeOstrdYSsqzX6BYbcB+w== - through2-filter@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/through2-filter/-/through2-filter-3.0.0.tgz#700e786df2367c2c88cd8aa5be4cf9c1e7831254" @@ -8824,12 +8853,12 @@ through2@^4.0.0, through2@^4.0.2: through@2, "through@>=2.2.7 <3", through@^2.3.4, through@^2.3.6, through@^2.3.8: version "2.3.8" resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5" - integrity sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU= + integrity sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg== time-stamp@^1.0.0: version "1.1.0" resolved "https://registry.yarnpkg.com/time-stamp/-/time-stamp-1.1.0.tgz#764a5a11af50561921b133f3b44e618687e0f5c3" - integrity sha1-dkpaEa9QVhkhsTPztE5hhofg9cM= + integrity sha512-gLCeArryy2yNTRzTGKbZbloctj64jkZ57hj5zdraXue6aFgd6PmvVtEyiUU+hvU0v7q08oVv8r8ev0tRo6bvgw== timers-ext@^0.1.7: version "0.1.7" @@ -8846,6 +8875,13 @@ tmp@^0.0.33: dependencies: os-tmpdir "~1.0.2" +tmp@~0.2.1: + version "0.2.1" + resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.2.1.tgz#8457fc3037dcf4719c251367a1af6500ee1ccf14" + integrity sha512-76SUhtfqR2Ijn+xllcI5P1oyannHNHByD80W1q447gU3mp9G9PSpGdWmjUOHRDPiHYacIk66W7ubDTuPF3BEtQ== + dependencies: + rimraf "^3.0.0" + tmpl@1.0.5: version "1.0.5" resolved "https://registry.yarnpkg.com/tmpl/-/tmpl-1.0.5.tgz#8683e0b902bb9c20c4f726e3c0b69f36518c07cc" @@ -8854,7 +8890,7 @@ tmpl@1.0.5: to-absolute-glob@^2.0.0: version "2.0.2" resolved "https://registry.yarnpkg.com/to-absolute-glob/-/to-absolute-glob-2.0.2.tgz#1865f43d9e74b0822db9f145b78cff7d0f7c849b" - integrity sha1-GGX0PZ50sIItufFFt4z/fQ98hJs= + integrity sha512-rtwLUQEwT8ZeKQbyFJyomBRYXyE16U5VKuy0ftxLMK/PZb2fkOsg5r9kHdauuVDbsNdIBoC/HCthpidamQFXYA== dependencies: is-absolute "^1.0.0" is-negated-glob "^1.0.0" @@ -8862,19 +8898,19 @@ to-absolute-glob@^2.0.0: to-fast-properties@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/to-fast-properties/-/to-fast-properties-2.0.0.tgz#dc5e698cbd079265bc73e0377681a4e4e83f616e" - integrity sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4= + integrity sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog== to-object-path@^0.3.0: version "0.3.0" resolved "https://registry.yarnpkg.com/to-object-path/-/to-object-path-0.3.0.tgz#297588b7b0e7e0ac08e04e672f85c1f4999e17af" - integrity sha1-KXWIt7Dn4KwI4E5nL4XB9JmeF68= + integrity sha512-9mWHdnGRuh3onocaHzukyvCZhzvr6tiflAy/JRFXcJX0TjgfWA9pk9t8CMbzmBE4Jfw58pXbkngtBtqYxzNEyg== dependencies: kind-of "^3.0.2" to-regex-range@^2.1.0: version "2.1.1" resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-2.1.1.tgz#7c80c17b9dfebe599e27367e0d4dd5590141db38" - integrity sha1-fIDBe53+vlmeJzZ+DU3VWQFB2zg= + integrity sha512-ZZWNfCjUokXXDGXFpZehJIkZqq91BcULFq/Pi7M5i4JnxXdhMKAK682z8bCW3o8Hj1wuuzoKcW3DfVzaP6VuNg== dependencies: is-number "^3.0.0" repeat-string "^1.6.1" @@ -8899,7 +8935,7 @@ to-regex@^3.0.1, to-regex@^3.0.2: to-through@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/to-through/-/to-through-2.0.0.tgz#fc92adaba072647bc0b67d6b03664aa195093af6" - integrity sha1-/JKtq6ByZHvAtn1rA2ZKoZUJOvY= + integrity sha512-+QIz37Ly7acM4EMdw2PRN389OneM5+d844tirkGp4dPKzI5OE72V9OsbFp+CIYJDahZ41ZV05hNtcPAQUAm9/Q== dependencies: through2 "^2.0.3" @@ -8918,7 +8954,7 @@ tr46@^2.1.0: tr46@~0.0.3: version "0.0.3" resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" - integrity sha1-gYT9NH2snNwYWZLzpmIuFLnZq2o= + integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw== treeverse@^2.0.0: version "2.0.0" @@ -8935,10 +8971,10 @@ trim-newlines@^4.0.2: resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-4.0.2.tgz#d6aaaf6a0df1b4b536d183879a6b939489808c7c" integrity sha512-GJtWyq9InR/2HRiLZgpIKv+ufIKrVrvjQWEj7PxAXNc5dwbNJkqhAUoAGgzRmULAnoOM5EIpveYd3J2VeSAIew== -ts-jest@28.0.3: - version "28.0.3" - resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-28.0.3.tgz#d1c47f167e56eef3989bb51afaf7fc1c87a04c52" - integrity sha512-HzgbEDQ2KgVtDmpXToqAcKTyGHdHsG23i/iUjfxji92G5eT09S1m9UHZd7csF0Bfgh9txM4JzwHnv7r1waFPlw== +ts-jest@28.0.7: + version "28.0.7" + resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-28.0.7.tgz#e18757a9e44693da9980a79127e5df5a98b37ac6" + integrity sha512-wWXCSmTwBVmdvWrOpYhal79bDpioDy4rTT+0vyUnE3ZzM7LOAAGG9NXwzkEL/a516rQEgnMmS/WKP9jBPCVJyA== dependencies: bs-logger "0.x" fast-json-stable-stringify "2.x" @@ -8947,12 +8983,12 @@ ts-jest@28.0.3: lodash.memoize "4.x" make-error "1.x" semver "7.x" - yargs-parser "^20.x" + yargs-parser "^21.0.1" -ts-node@10.8.0: - version "10.8.0" - resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-10.8.0.tgz#3ceb5ac3e67ae8025c1950626aafbdecb55d82ce" - integrity sha512-/fNd5Qh+zTt8Vt1KbYZjRHCE9sI5i7nqfD/dzBBRDeVXZXS6kToW6R7tTU6Nd4XavFs0mAVCg29Q//ML7WsZYA== +ts-node@10.9.1: + version "10.9.1" + resolved "https://registry.yarnpkg.com/ts-node/-/ts-node-10.9.1.tgz#e73de9102958af9e1f0b168a6ff320e25adcff4b" + integrity sha512-NtVysVPkxxrwFGUUxGYhfux8k78pQB3JqYBXlLRZgdGUqTO5wU/UyHop5p70iEbGhB7q5KmiZiU0Y3KlJrScEw== dependencies: "@cspotcode/source-map-support" "^0.8.0" "@tsconfig/node10" "^1.0.7" @@ -8968,7 +9004,17 @@ ts-node@10.8.0: v8-compile-cache-lib "^3.0.1" yn "3.1.1" -tslib@^1.8.1, tslib@^1.9.0: +tsconfig-paths@^3.9.0: + version "3.14.1" + resolved "https://registry.yarnpkg.com/tsconfig-paths/-/tsconfig-paths-3.14.1.tgz#ba0734599e8ea36c862798e920bcf163277b137a" + integrity sha512-fxDhWnFSLt3VuTwtvJt5fpwxBHg5AdKWMsgcPOOIilyjymcYVZoCQF8fvFRezCNfblEXmi+PcM1eYHeOAgXCOQ== + dependencies: + "@types/json5" "^0.0.29" + json5 "^1.0.1" + minimist "^1.2.6" + strip-bom "^3.0.0" + +tslib@^1.8.1: version "1.14.1" resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00" integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg== @@ -9038,9 +9084,9 @@ type@^1.0.1: integrity sha512-+5nt5AAniqsCnu2cEQQdpzCAh33kVx8n0VoFidKpB1dVVLAN/F+bgVOqOJqOnEnrhp222clB5p3vUlD+1QAnfg== type@^2.5.0: - version "2.6.0" - resolved "https://registry.yarnpkg.com/type/-/type-2.6.0.tgz#3ca6099af5981d36ca86b78442973694278a219f" - integrity sha512-eiDBDOmkih5pMbo9OqsqPRGMljLodLcwd5XD5JbtNB0o89xZAwynY9EdCDsJU7LtcVCClu9DvM7/0Ep1hYX3EQ== + version "2.7.2" + resolved "https://registry.yarnpkg.com/type/-/type-2.7.2.tgz#2376a15a3a28b1efa0f5350dcf72d24df6ef98d0" + integrity sha512-dzlvlNlt6AXU7EBSfpAscydQ7gXB+pPGsPnfJnZpiNJBDj7IaJzQlBZYGdEi4R9HmPdBv2XmWJ6YUtoTa7lmCw== typedarray-to-buffer@^3.1.5: version "3.1.5" @@ -9052,23 +9098,22 @@ typedarray-to-buffer@^3.1.5: typedarray@^0.0.6: version "0.0.6" resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777" - integrity sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c= + integrity sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA== -typedoc@0.22.16: - version "0.22.16" - resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.22.16.tgz#41e0ff099274ce13c5c3ea49cc5ad615d2f3119e" - integrity sha512-0Qf0/CsQe6JZTXoYwBM3Iql8gLAWLjQP7O/j9YzfkJp3G/WVGmIMRajKnldJuA/zVvhr+ifsHTgctQh5g2t4iw== +typedoc@0.23.10: + version "0.23.10" + resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.23.10.tgz#285d595a5f2e35ccdf6f38eba4dfe951d5bff461" + integrity sha512-03EUiu/ZuScUBMnY6p0lY+HTH8SwhzvRE3gImoemdPDWXPXlks83UGTx++lyquWeB1MTwm9D9Ca8RIjkK3AFfQ== dependencies: - glob "^8.0.3" lunr "^2.3.9" - marked "^4.0.16" + marked "^4.0.18" minimatch "^5.1.0" shiki "^0.10.1" -typescript@4.7.2: - version "4.7.2" - resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.7.2.tgz#1f9aa2ceb9af87cca227813b4310fff0b51593c4" - integrity sha512-Mamb1iX2FDUpcTRzltPxgWMKy3fhg0TN378ylbktPGPK/99KbDtMQ4W1hwgsbPAsG3a0xKa1vmw4VKZQbkvz5A== +typescript@4.7.4: + version "4.7.4" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.7.4.tgz#1a88596d1cf47d59507a1bcdfb5b9dfe4d488235" + integrity sha512-C0WQT0gezHuw6AdY1M2jxUO83Rjf0HP7Sk1DtXj6j1EwkQNZrHAg2XPWlq62oqEhYvONq5pkC2Y9oPljWToLmQ== typical@^4.0.0: version "4.0.0" @@ -9081,19 +9126,19 @@ typical@^5.2.0: integrity sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg== uglify-js@^3.1.4: - version "3.15.5" - resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.15.5.tgz#2b10f9e0bfb3f5c15a8e8404393b6361eaeb33b3" - integrity sha512-hNM5q5GbBRB5xB+PMqVRcgYe4c8jbyZ1pzZhS6jbq54/4F2gFK869ZheiE5A8/t+W5jtTNpWef/5Q9zk639FNQ== + version "3.16.3" + resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.16.3.tgz#94c7a63337ee31227a18d03b8a3041c210fd1f1d" + integrity sha512-uVbFqx9vvLhQg0iBaau9Z75AxWJ8tqM9AV890dIZCLApF4rTcyHwmAvLeEdYRs+BzYWu8Iw81F79ah0EfTXbaw== unc-path-regex@^0.1.2: version "0.1.2" resolved "https://registry.yarnpkg.com/unc-path-regex/-/unc-path-regex-0.1.2.tgz#e73dd3d7b0d7c5ed86fbac6b0ae7d8c6a69d50fa" - integrity sha1-5z3T17DXxe2G+6xrCufYxqadUPo= + integrity sha512-eXL4nmJT7oCpkZsHZUOJo8hcX3GbsiDOa0Qu9F646fi8dT3XuSVopVqAcEiVzSKKH7UoDti23wNX3qGFxcW5Qg== undertaker-registry@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/undertaker-registry/-/undertaker-registry-1.0.1.tgz#5e4bda308e4a8a2ae584f9b9a4359a499825cc50" - integrity sha1-XkvaMI5KiirlhPm5pDWaSZglzFA= + integrity sha512-UR1khWeAjugW3548EfQmL9Z7pGMlBgXteQpr1IZeZBtnkCJQJIJ1Scj0mb9wQaPvUZ9Q17XqW6TIaPchJkyfqw== undertaker@^1.2.1: version "1.3.0" @@ -9156,7 +9201,7 @@ universalify@^2.0.0: unset-value@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/unset-value/-/unset-value-1.0.0.tgz#8376873f7d2335179ffb1e6fc3a8ed0dfc8ab559" - integrity sha1-g3aHP30jNRef+x5vw6jtDfyKtVk= + integrity sha512-PcA2tsuGSF9cnySLHTLSh2qrQiJ70mn+r+Glzxv2TWZblxsxCC52BDlZoPCsz7STd9pN7EZetkWZBAvk4cgZdQ== dependencies: has-value "^0.3.1" isobject "^3.0.0" @@ -9171,6 +9216,14 @@ upath@^2.0.1: resolved "https://registry.yarnpkg.com/upath/-/upath-2.0.1.tgz#50c73dea68d6f6b990f51d279ce6081665d61a8b" integrity sha512-1uEe95xksV1O0CYKXo8vQvN1JEbtJp7lb7C5U9HMsIp6IVwntkH/oNUzyVNQSd4S1sYk2FpSSW44FqMc8qee5w== +update-browserslist-db@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/update-browserslist-db/-/update-browserslist-db-1.0.5.tgz#be06a5eedd62f107b7c19eb5bcefb194411abf38" + integrity sha512-dteFFpCyvuDdr9S/ff1ISkKt/9YZxKjI9WlRR99c180GaztJtRa/fn18FdxGVKVsnPY7/a/FDN68mcvUmP4U7Q== + dependencies: + escalade "^3.1.1" + picocolors "^1.0.0" + uri-js@^4.2.2: version "4.4.1" resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.4.1.tgz#9b1a52595225859e55f669d928f88c6c57f2a77e" @@ -9181,7 +9234,7 @@ uri-js@^4.2.2: urix@^0.1.0: version "0.1.0" resolved "https://registry.yarnpkg.com/urix/-/urix-0.1.0.tgz#da937f7a62e21fec1fd18d49b35c2935067a6c72" - integrity sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI= + integrity sha512-Am1ousAhSLBeB9cG/7k7r2R0zj50uDRlZHPGbazid5s9rlF1F/QKYObEKSIunSjIOkJZqwRRLpvewjEkM7pSqg== use@^3.1.0: version "3.1.1" @@ -9191,7 +9244,7 @@ use@^3.1.0: util-deprecate@^1.0.1, util-deprecate@~1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" - integrity sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8= + integrity sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw== uuid@^8.3.2: version "8.3.2" @@ -9203,17 +9256,17 @@ v8-compile-cache-lib@^3.0.1: resolved "https://registry.yarnpkg.com/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz#6336e8d71965cb3d35a1bbb7868445a7c05264bf" integrity sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg== -v8-compile-cache@^2.0.3: +v8-compile-cache@2.3.0, v8-compile-cache@^2.0.3: version "2.3.0" resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz#2de19618c66dc247dcfb6f99338035d8245a2cee" integrity sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA== -v8-to-istanbul@^9.0.0: - version "9.0.0" - resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-9.0.0.tgz#be0dae58719fc53cb97e5c7ac1d7e6d4f5b19511" - integrity sha512-HcvgY/xaRm7isYmyx+lFKA4uQmfUbN0J4M0nNItvzTvH/iQ9kW5j/t4YSR+Ge323/lrgDAWJoF46tzGQHwBHFw== +v8-to-istanbul@^9.0.1: + version "9.0.1" + resolved "https://registry.yarnpkg.com/v8-to-istanbul/-/v8-to-istanbul-9.0.1.tgz#b6f994b0b5d4ef255e17a0d17dc444a9f5132fa4" + integrity sha512-74Y4LqY74kLE6IFyIjPtkSTWzUZmj8tdHT9Ii/26dvQ6K9Dl2NbEfj0XgU2sHCtKgt5VupqhlO/5aWuqS+IY1w== dependencies: - "@jridgewell/trace-mapping" "^0.3.7" + "@jridgewell/trace-mapping" "^0.3.12" "@types/istanbul-lib-coverage" "^2.0.1" convert-source-map "^1.6.0" @@ -9235,7 +9288,7 @@ validate-npm-package-license@^3.0.1, validate-npm-package-license@^3.0.4: validate-npm-package-name@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/validate-npm-package-name/-/validate-npm-package-name-3.0.0.tgz#5fa912d81eb7d0c74afc140de7317f0ca7df437e" - integrity sha1-X6kS2B630MdK/BQN5zF/DKffQ34= + integrity sha512-M6w37eVCMMouJ9V/sdPGnC5H4uDr73/+xdq0FBLO3TFFX1+7wiUY6Es328NN+y43tmY+doUdN9g9J21vqB7iLw== dependencies: builtins "^1.0.3" @@ -9249,12 +9302,12 @@ validate-npm-package-name@^4.0.0: value-or-function@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/value-or-function/-/value-or-function-3.0.0.tgz#1c243a50b595c1be54a754bfece8563b9ff8d813" - integrity sha1-HCQ6ULWVwb5Up1S/7OhWO5/42BM= + integrity sha512-jdBB2FrWvQC/pnPtIqcLsMaQgjhdb6B7tk1MMyTKapox+tQZbdRP4uLxu/JY0t7fbfDCUMnuelzEYv5GsxHhdg== vinyl-buffer@1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/vinyl-buffer/-/vinyl-buffer-1.0.1.tgz#96c1a3479b8c5392542c612029013b5b27f88bbf" - integrity sha1-lsGjR5uMU5JULGEgKQE7Wyf4i78= + integrity sha512-LRBE2/g3C1hSHL2k/FynSZcVTRhEw8sb08oKGt/0hukZXwrh2m8nfy+r5yLhGEk7eFFuclhyIuPct/Bxlxk6rg== dependencies: bl "^1.2.1" through2 "^2.0.3" @@ -9285,14 +9338,14 @@ vinyl-fs@^3.0.0, vinyl-fs@^3.0.3: vinyl-named@1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/vinyl-named/-/vinyl-named-1.1.0.tgz#94e4fe741e38db0ec303e5b3d868b297a2deab66" - integrity sha1-lOT+dB442w7DA+Wz2Giyl6Leq2Y= + integrity sha512-ElYBnsSw8Y1Hz11WPw0DFmi+TBNTEBhZ9zXaHluDSIZZnkFIGCjGRBpsW5QmbMMLwv+lRpUD3VbKdJCbNpct7Q== dependencies: through "^2.3.6" vinyl-source-stream@2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/vinyl-source-stream/-/vinyl-source-stream-2.0.0.tgz#f38a5afb9dd1e93b65d550469ac6182ac4f54b8e" - integrity sha1-84pa+53R6Ttl1VBGmsYYKsT1S44= + integrity sha512-Y5f1wRGajOfYukhv8biIGA7iZiY8UOIc3zJ6zcUNIbRG1BVuXzBsfSfe7MUJTttVkuy64k/pGQtJdd/aIt+hbw== dependencies: through2 "^2.0.3" vinyl "^2.1.0" @@ -9300,7 +9353,7 @@ vinyl-source-stream@2.0.0: vinyl-sourcemap@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/vinyl-sourcemap/-/vinyl-sourcemap-1.1.0.tgz#92a800593a38703a8cdb11d8b300ad4be63b3e16" - integrity sha1-kqgAWTo4cDqM2xHYswCtS+Y7PhY= + integrity sha512-NiibMgt6VJGJmyw7vtzhctDcfKch4e4n9TBeoWlirb7FMg9/1Ov9k+A5ZRAtywBpRPiyECvQRQllYM8dECegVA== dependencies: append-buffer "^1.0.2" convert-source-map "^1.5.0" @@ -9313,7 +9366,7 @@ vinyl-sourcemap@^1.1.0: vinyl-sourcemaps-apply@^0.2.0, vinyl-sourcemaps-apply@^0.2.1: version "0.2.1" resolved "https://registry.yarnpkg.com/vinyl-sourcemaps-apply/-/vinyl-sourcemaps-apply-0.2.1.tgz#ab6549d61d172c2b1b87be5c508d239c8ef87705" - integrity sha1-q2VJ1h0XLCsbh75cUI0jnI74dwU= + integrity sha512-+oDh3KYZBoZC8hfocrbrxbLUeaYtQK7J5WU5Br9VqWqmCll3tFJqKp97GC9GmMsVIL0qnx2DgEDVxdo5EZ5sSw== dependencies: source-map "^0.5.1" @@ -9344,25 +9397,25 @@ walk-up-path@^1.0.0: resolved "https://registry.yarnpkg.com/walk-up-path/-/walk-up-path-1.0.0.tgz#d4745e893dd5fd0dbb58dd0a4c6a33d9c9fec53e" integrity sha512-hwj/qMDUEjCU5h0xr90KGCf0tg0/LgJbmOWgrWKYlcJZM7XvquvUJZ0G/HMGr7F7OQMOUuPHWP9JpriinkAlkg== -walker@^1.0.7: +walker@^1.0.8: version "1.0.8" resolved "https://registry.yarnpkg.com/walker/-/walker-1.0.8.tgz#bd498db477afe573dc04185f011d3ab8a8d7653f" integrity sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ== dependencies: makeerror "1.0.12" -watchpack@^2.3.1: - version "2.3.1" - resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.3.1.tgz#4200d9447b401156eeca7767ee610f8809bc9d25" - integrity sha512-x0t0JuydIo8qCNctdDrn1OzH/qDzk2+rdCOC3YzumZ42fiMqmQ7T3xQurykYMhYfHaPHTp4ZxAx2NfUo1K6QaA== +watchpack@^2.4.0: + version "2.4.0" + resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.4.0.tgz#fa33032374962c78113f93c7f2fb4c54c9862a5d" + integrity sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg== dependencies: glob-to-regexp "^0.4.1" graceful-fs "^4.1.2" -wcwidth@^1.0.0: +wcwidth@^1.0.0, wcwidth@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/wcwidth/-/wcwidth-1.0.1.tgz#f0b0dcf915bc5ff1528afadb2c0e17b532da2fe8" - integrity sha1-8LDc+RW8X/FSivrbLA4XtTLaL+g= + integrity sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg== dependencies: defaults "^1.0.3" @@ -9379,7 +9432,7 @@ web-streams-polyfill@~3.0.3: webidl-conversions@^3.0.0: version "3.0.1" resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871" - integrity sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE= + integrity sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ== webidl-conversions@^6.1.0: version "6.1.0" @@ -9420,21 +9473,21 @@ webpack-stream@7.0.0: through "^2.3.8" vinyl "^2.2.1" -webpack@5.72.1: - version "5.72.1" - resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.72.1.tgz#3500fc834b4e9ba573b9f430b2c0a61e1bb57d13" - integrity sha512-dXG5zXCLspQR4krZVR6QgajnZOjW2K/djHvdcRaDQvsjV9z9vaW6+ja5dZOYbqBBjF6kGXka/2ZyxNdc+8Jung== +webpack@5.74.0: + version "5.74.0" + resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.74.0.tgz#02a5dac19a17e0bb47093f2be67c695102a55980" + integrity sha512-A2InDwnhhGN4LYctJj6M1JEaGL7Luj6LOmyBHjcI8529cm5p6VXiTIW2sn6ffvEAKmveLzvu4jrihwXtPojlAA== dependencies: "@types/eslint-scope" "^3.7.3" "@types/estree" "^0.0.51" "@webassemblyjs/ast" "1.11.1" "@webassemblyjs/wasm-edit" "1.11.1" "@webassemblyjs/wasm-parser" "1.11.1" - acorn "^8.4.1" + acorn "^8.7.1" acorn-import-assertions "^1.7.6" browserslist "^4.14.5" chrome-trace-event "^1.0.2" - enhanced-resolve "^5.9.3" + enhanced-resolve "^5.10.0" es-module-lexer "^0.9.0" eslint-scope "5.1.1" events "^3.2.0" @@ -9447,13 +9500,13 @@ webpack@5.72.1: schema-utils "^3.1.0" tapable "^2.1.1" terser-webpack-plugin "^5.1.3" - watchpack "^2.3.1" + watchpack "^2.4.0" webpack-sources "^3.2.3" whatwg-url@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-5.0.0.tgz#966454e8765462e37644d3626f6742ce8b70965d" - integrity sha1-lmRU6HZUYuN2RNNib2dCzotwll0= + integrity sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw== dependencies: tr46 "~0.0.3" webidl-conversions "^3.0.0" @@ -9470,7 +9523,7 @@ whatwg-url@^8.4.0: which-module@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/which-module/-/which-module-1.0.0.tgz#bba63ca861948994ff307736089e3b96026c2a4f" - integrity sha1-u6Y8qGGUiZT/MHc2CJ47lgJsKk8= + integrity sha512-F6+WgncZi/mJDrammbTuHe1q0R5hOXv/mBaiNA2TCNT/LTHusX0V+CJnj9XT8ki5ln2UZyyddDgHfCzyrOH7MQ== which@^1.2.14: version "1.3.1" @@ -9486,7 +9539,7 @@ which@^2.0.1, which@^2.0.2: dependencies: isexe "^2.0.0" -wide-align@^1.1.0, wide-align@^1.1.5: +wide-align@^1.1.5: version "1.1.5" resolved "https://registry.yarnpkg.com/wide-align/-/wide-align-1.1.5.tgz#df1d4c206854369ecf3c9a4898f1b23fbd9d15d3" integrity sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg== @@ -9501,7 +9554,7 @@ word-wrap@^1.2.3: wordwrap@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-1.0.0.tgz#27584810891456a4171c8d0226441ade90cbcaeb" - integrity sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus= + integrity sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q== wordwrapjs@^4.0.0: version "4.0.1" @@ -9514,7 +9567,7 @@ wordwrapjs@^4.0.0: wrap-ansi@^2.0.0: version "2.1.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-2.1.0.tgz#d8fc3d284dd05794fe84973caecdd1cf824fdd85" - integrity sha1-2Pw9KE3QV5T+hJc8rs3Rz4JP3YU= + integrity sha512-vAaEaDM946gbNpH5pLVNR+vX2ht6n0Bt3GXwVB1AuAqZosOvHNF3P7wDnh8KLkSqgUh0uh77le7Owgoz+Z9XBw== dependencies: string-width "^1.0.1" strip-ansi "^3.0.1" @@ -9540,7 +9593,7 @@ wrap-ansi@^7.0.0: wrappy@1: version "1.0.2" resolved "https://registry.yarnpkg.com/wrappy/-/wrappy-1.0.2.tgz#b5243d8f3ec1aa35f1364605bc0d1036e30ab69f" - integrity sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8= + integrity sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ== write-file-atomic@^2.4.2: version "2.4.3" @@ -9551,7 +9604,7 @@ write-file-atomic@^2.4.2: imurmurhash "^0.1.4" signal-exit "^3.0.2" -write-file-atomic@^3.0.0, write-file-atomic@^3.0.3: +write-file-atomic@^3.0.0: version "3.0.3" resolved "https://registry.yarnpkg.com/write-file-atomic/-/write-file-atomic-3.0.3.tgz#56bd5c5a5c70481cd19c571bd39ab965a5de56e8" integrity sha512-AvHcyZ5JnSfq3ioSyjrBkH9yW4m7Ayk8/9My/DD9onKeu/94fwrMocemO2QAJFAlnnDN+ZDS+ZjAR5ua1/PV/Q== @@ -9603,9 +9656,9 @@ write-pkg@^4.0.0: write-json-file "^3.2.0" ws@^7.3.1: - version "7.5.8" - resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.8.tgz#ac2729881ab9e7cbaf8787fe3469a48c5c7f636a" - integrity sha512-ri1Id1WinAX5Jqn9HejiGb8crfRio0Qgu8+MtL36rlTA6RLsMdWt1Az/19A2Qij6uSHUMphEFaTKa4WG+UNHNw== + version "7.5.9" + resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.9.tgz#54fa7db29f4c7cec68b1ddd3a89de099942bb591" + integrity sha512-F+P9Jil7UiSKSkppIiD94dN07AwvFixvLIj1Og1Rl9GGMuNipJnV9JzjD6XuqmAeiswGvUmNLjr5cFuXwNS77Q== xml2js@0.4.23: version "0.4.23" @@ -9650,12 +9703,17 @@ yargs-parser@20.2.4: resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.4.tgz#b42890f14566796f85ae8e3a25290d205f154a54" integrity sha512-WOkpgNhPTlE73h4VFAFsOnomJVaovO8VqLDzy5saChRBFQFBoMYirowyW+Q9HB4HFF4Z7VZTiG3iSzJJA29yRA== -yargs-parser@>=5.0.0-security.0, yargs-parser@^21.0.0: +yargs-parser@21.0.1: version "21.0.1" resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.0.1.tgz#0267f286c877a4f0f728fceb6f8a3e4cb95c6e35" integrity sha512-9BK1jFpLzJROCI5TzwZL/TU4gqjK5xiHV/RfWLOahrjAko/e4DJkRDZQXfvqAsiZzzYhgAzbgz6lg48jcm4GLg== -yargs-parser@^20.2.2, yargs-parser@^20.2.3, yargs-parser@^20.2.9, yargs-parser@^20.x: +yargs-parser@>=5.0.0-security.0, yargs-parser@^21.0.0, yargs-parser@^21.0.1: + version "21.1.1" + resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.1.1.tgz#9096bceebf990d21bb31fa9516e0ede294a77d35" + integrity sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw== + +yargs-parser@^20.2.2, yargs-parser@^20.2.3, yargs-parser@^20.2.9: version "20.2.9" resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee" integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w== @@ -9681,7 +9739,7 @@ yargs@^16.2.0: y18n "^5.0.5" yargs-parser "^20.2.2" -yargs@^17.3.1: +yargs@^17.3.1, yargs@^17.4.0: version "17.5.1" resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.5.1.tgz#e109900cab6fcb7fd44b1d8249166feb0b36e58e" integrity sha512-t6YAJcxDkNX7NFYiVtKvWUz8l+PaKTLiL63mJYWR2GnHq2gjEWISzsLp9wg3aY36dY1j+gfIEL3pIF+XlJJfbA== diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index 29163f47995f2..79bc809a21fce 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -189,7 +189,7 @@ endmacro() set(CMAKE_CXX_STANDARD 11) -set(MLARROW_VERSION "9.0.0-SNAPSHOT") +set(MLARROW_VERSION "10.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}") project(mlarrow VERSION "${MLARROW_BASE_VERSION}") diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a657f56bb2df9..66087fb97955f 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -604,29 +604,27 @@ foreach(module ${CYTHON_EXTENSIONS}) ${module_output_directory}) endif() - if(PYARROW_BUNDLE_ARROW_CPP) - # In the event that we are bundling the shared libraries (e.g. in a - # manylinux1 wheel), we need to set the RPATH of the extensions to the - # root of the pyarrow/ package so that libarrow/libarrow_python are able - # to be loaded properly - if(APPLE) - set(module_install_rpath "@loader_path/") - else() - set(module_install_rpath "\$ORIGIN") - endif() + # In the event that we are bundling the shared libraries (e.g. in a + # manylinux1 wheel), we need to set the RPATH of the extensions to the + # root of the pyarrow/ package so that libarrow is able to be + # loaded properly + if(APPLE) + set(module_install_rpath "@loader_path/") + else() + set(module_install_rpath "\$ORIGIN") + endif() - # XXX(wesm): ARROW-2326 this logic is only needed when we have Cython - # modules in interior directories. Since all of our C extensions and - # bundled libraries are in the same place, we can skip this part + # XXX(wesm): ARROW-2326 this logic is only needed when we have Cython + # modules in interior directories. Since all of our C extensions and + # bundled libraries are in the same place, we can skip this part - # list(LENGTH directories i) - # while(${i} GREATER 0) - # set(module_install_rpath "${module_install_rpath}/..") - # math(EXPR i "${i} - 1" ) - # endwhile(${i} GREATER 0) + # list(LENGTH directories i) + # while(${i} GREATER 0) + # set(module_install_rpath "${module_install_rpath}/..") + # math(EXPR i "${i} - 1" ) + # endwhile(${i} GREATER 0) - set_target_properties(${module_name} PROPERTIES INSTALL_RPATH ${module_install_rpath}) - endif() + set_target_properties(${module_name} PROPERTIES INSTALL_RPATH ${module_install_rpath}) if(PYARROW_GENERATE_COVERAGE) set_target_properties(${module_name} PROPERTIES COMPILE_DEFINITIONS diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index a030a53e37c95..19d630bc0abf4 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -51,7 +51,7 @@ def parse_git(root, **kwargs): """ from setuptools_scm.git import parse kwargs['describe_command'] = \ - "git describe --dirty --tags --long --match 'apache-arrow-[0-9].*'" + "git describe --dirty --tags --long --match 'apache-arrow-[0-9]*.*'" return parse(root, **kwargs) __version__ = setuptools_scm.get_version('../', parse=parse_git) diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 66ed7db99735f..c17e855aa53d6 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1159,6 +1159,7 @@ cdef class ParquetReader(_Weakrefable): CMemoryPool* pool unique_ptr[FileReader] reader FileMetaData _metadata + shared_ptr[CRandomAccessFile] rd_handle cdef public: _column_idx_map @@ -1175,7 +1176,6 @@ cdef class ParquetReader(_Weakrefable): thrift_string_size_limit=None, thrift_container_size_limit=None): cdef: - shared_ptr[CRandomAccessFile] rd_handle shared_ptr[CFileMetaData] c_metadata CReaderProperties properties = default_reader_properties() ArrowReaderProperties arrow_props = ( @@ -1221,10 +1221,10 @@ cdef class ParquetReader(_Weakrefable): string_to_timeunit(coerce_int96_timestamp_unit)) self.source = source + get_reader(source, use_memory_map, &self.rd_handle) - get_reader(source, use_memory_map, &rd_handle) with nogil: - check_status(builder.Open(rd_handle, properties, c_metadata)) + check_status(builder.Open(self.rd_handle, properties, c_metadata)) # Set up metadata with nogil: @@ -1435,6 +1435,19 @@ cdef class ParquetReader(_Weakrefable): .ReadColumn(column_index, &out)) return pyarrow_wrap_chunked_array(out) + def close(self): + if not self.closed: + with nogil: + check_status(self.rd_handle.get().Close()) + + @property + def closed(self): + if self.rd_handle == NULL: + return True + with nogil: + closed = self.rd_handle.get().closed() + return closed + cdef shared_ptr[WriterProperties] _create_writer_properties( use_dictionary=None, diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx index f668038e62381..955a7a5514bad 100644 --- a/python/pyarrow/_s3fs.pyx +++ b/python/pyarrow/_s3fs.pyx @@ -88,6 +88,44 @@ def resolve_s3_region(bucket): return frombytes(c_region) +class S3RetryStrategy: + """ + Base class for AWS retry strategies for use with S3. + + Parameters + ---------- + max_attempts : int, default 3 + The maximum number of retry attempts to attempt before failing. + """ + + def __init__(self, max_attempts=3): + self.max_attempts = max_attempts + + +class AwsStandardS3RetryStrategy(S3RetryStrategy): + """ + Represents an AWS Standard retry strategy for use with S3. + + Parameters + ---------- + max_attempts : int, default 3 + The maximum number of retry attempts to attempt before failing. + """ + pass + + +class AwsDefaultS3RetryStrategy(S3RetryStrategy): + """ + Represents an AWS Default retry strategy for use with S3. + + Parameters + ---------- + max_attempts : int, default 3 + The maximum number of retry attempts to attempt before failing. + """ + pass + + cdef class S3FileSystem(FileSystem): """ S3-backed FileSystem implementation @@ -137,6 +175,13 @@ cdef class S3FileSystem(FileSystem): assumed role session will be refreshed. region : str, default 'us-east-1' AWS region to connect to. + request_timeout : double, default None + Socket read timeouts on Windows and macOS, in seconds. + If omitted, the AWS SDK default value is used (typically 3 seconds). + This option is ignored on non-Windows, non-macOS systems. + connect_timeout : double, default None + Socket connection timeout, in seconds. + If omitted, the AWS SDK default value is used (typically 1 second). scheme : str, default 'https' S3 connection transport scheme. endpoint_override : str, default None @@ -166,6 +211,9 @@ cdef class S3FileSystem(FileSystem): allow_bucket_deletion : bool, default False Whether to allow DeleteDir at the bucket-level. This option may also be passed in a URI query parameter. + retry_strategy : S3RetryStrategy, default AwsStandardS3RetryStrategy(max_attempts=3) + The retry strategy to use with S3; fail after max_attempts. Available + strategies are AwsStandardS3RetryStrategy, AwsDefaultS3RetryStrategy. Examples -------- @@ -183,11 +231,13 @@ cdef class S3FileSystem(FileSystem): CS3FileSystem* s3fs def __init__(self, *, access_key=None, secret_key=None, session_token=None, - bint anonymous=False, region=None, scheme=None, - endpoint_override=None, bint background_writes=True, - default_metadata=None, role_arn=None, session_name=None, - external_id=None, load_frequency=900, proxy_options=None, - allow_bucket_creation=False, allow_bucket_deletion=False): + bint anonymous=False, region=None, request_timeout=None, + connect_timeout=None, scheme=None, endpoint_override=None, + bint background_writes=True, default_metadata=None, + role_arn=None, session_name=None, external_id=None, + load_frequency=900, proxy_options=None, + allow_bucket_creation=False, allow_bucket_deletion=False, + retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3)): cdef: CS3Options options shared_ptr[CS3FileSystem] wrapped @@ -254,6 +304,10 @@ cdef class S3FileSystem(FileSystem): if region is not None: options.region = tobytes(region) + if request_timeout is not None: + options.request_timeout = request_timeout + if connect_timeout is not None: + options.connect_timeout = connect_timeout if scheme is not None: options.scheme = tobytes(scheme) if endpoint_override is not None: @@ -288,6 +342,15 @@ cdef class S3FileSystem(FileSystem): options.allow_bucket_creation = allow_bucket_creation options.allow_bucket_deletion = allow_bucket_deletion + if isinstance(retry_strategy, AwsStandardS3RetryStrategy): + options.retry_strategy = CS3RetryStrategy.GetAwsStandardRetryStrategy( + retry_strategy.max_attempts) + elif isinstance(retry_strategy, AwsDefaultS3RetryStrategy): + options.retry_strategy = CS3RetryStrategy.GetAwsDefaultRetryStrategy( + retry_strategy.max_attempts) + else: + raise ValueError(f'Invalid retry_strategy {retry_strategy!r}') + with nogil: wrapped = GetResultValue(CS3FileSystem.Make(options)) @@ -324,6 +387,8 @@ cdef class S3FileSystem(FileSystem): CS3CredentialsKind_Anonymous), region=frombytes(opts.region), scheme=frombytes(opts.scheme), + connect_timeout=opts.connect_timeout, + request_timeout=opts.request_timeout, endpoint_override=frombytes(opts.endpoint_override), role_arn=frombytes(opts.role_arn), session_name=frombytes(opts.session_name), diff --git a/python/pyarrow/_substrait.pyx b/python/pyarrow/_substrait.pyx index 7f079fb717b79..05794a95a20ee 100644 --- a/python/pyarrow/_substrait.pyx +++ b/python/pyarrow/_substrait.pyx @@ -19,6 +19,7 @@ from cython.operator cimport dereference as deref from pyarrow import Buffer +from pyarrow.lib import frombytes from pyarrow.lib cimport * from pyarrow.includes.libarrow cimport * from pyarrow.includes.libarrow_substrait cimport * @@ -77,3 +78,27 @@ def _parse_json_plan(plan): with nogil: c_buf_plan = GetResultValue(c_res_buffer) return pyarrow_wrap_buffer(c_buf_plan) + + +def get_supported_functions(): + """ + Get a list of Substrait functions that the underlying + engine currently supports. + + Returns + ------- + list[str] + A list of function ids encoded as '{uri}#{name}' + """ + + cdef: + ExtensionIdRegistry* c_id_registry + std_vector[c_string] c_ids + + c_id_registry = default_extension_id_registry() + c_ids = c_id_registry.GetSupportedSubstraitFunctions() + + functions_list = [] + for c_id in c_ids: + functions_list.append(frombytes(c_id)) + return functions_list diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py index 2518e37ec6f3d..326b37ec6e1ae 100644 --- a/python/pyarrow/dataset.py +++ b/python/pyarrow/dataset.py @@ -964,7 +964,7 @@ def file_visitor(written_file): # was converted to one of those two. So we can grab the schema # to build the partitioning object from Dataset. if isinstance(data, Scanner): - partitioning_schema = data.dataset_schema + partitioning_schema = data.projected_schema else: partitioning_schema = data.schema partitioning = _ensure_write_partitioning(partitioning, diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index b2db818a9abcb..c6f44ccbb5997 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -52,8 +52,9 @@ try: from pyarrow._s3fs import ( # noqa - S3FileSystem, S3LogLevel, initialize_s3, finalize_s3, - resolve_s3_region) + AwsDefaultS3RetryStrategy, AwsStandardS3RetryStrategy, + S3FileSystem, S3LogLevel, S3RetryStrategy, finalize_s3, + initialize_s3, resolve_s3_region) except ImportError: _not_imported.append("S3FileSystem") else: diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 4cbcef84e8837..781d2ce7ad6e8 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -760,6 +760,10 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: CChunkedArray(const vector[shared_ptr[CArray]]& arrays) CChunkedArray(const vector[shared_ptr[CArray]]& arrays, const shared_ptr[CDataType]& type) + + @staticmethod + CResult[shared_ptr[CChunkedArray]] Make(vector[shared_ptr[CArray]] chunks, + shared_ptr[CDataType] type) int64_t length() int64_t null_count() int num_chunks() @@ -1280,6 +1284,12 @@ cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil: Seekable): CResult[int64_t] GetSize() + @staticmethod + CResult[shared_ptr[CInputStream]] GetStream( + shared_ptr[CRandomAccessFile] file, + int64_t file_offset, + int64_t nbytes) + CResult[int64_t] ReadAt(int64_t position, int64_t nbytes, uint8_t* buffer) CResult[shared_ptr[CBuffer]] ReadAt(int64_t position, int64_t nbytes) diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd index d47b462aa5c94..7984b54f587b8 100644 --- a/python/pyarrow/includes/libarrow_fs.pxd +++ b/python/pyarrow/includes/libarrow_fs.pxd @@ -150,8 +150,17 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: CS3CredentialsKind_WebIdentity \ "arrow::fs::S3CredentialsKind::WebIdentity" + cdef cppclass CS3RetryStrategy "arrow::fs::S3RetryStrategy": + @staticmethod + shared_ptr[CS3RetryStrategy] GetAwsDefaultRetryStrategy(int64_t max_attempts) + + @staticmethod + shared_ptr[CS3RetryStrategy] GetAwsStandardRetryStrategy(int64_t max_attempts) + cdef cppclass CS3Options "arrow::fs::S3Options": c_string region + double connect_timeout + double request_timeout c_string endpoint_override c_string scheme c_bool background_writes @@ -164,6 +173,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: int load_frequency CS3ProxyOptions proxy_options CS3CredentialsKind credentials_kind + shared_ptr[CS3RetryStrategy] retry_strategy void ConfigureDefaultCredentials() void ConfigureAccessKey(const c_string& access_key, const c_string& secret_key, diff --git a/python/pyarrow/includes/libarrow_substrait.pxd b/python/pyarrow/includes/libarrow_substrait.pxd index 2e1a17b06bddd..0b3ace75d92b0 100644 --- a/python/pyarrow/includes/libarrow_substrait.pxd +++ b/python/pyarrow/includes/libarrow_substrait.pxd @@ -17,10 +17,20 @@ # distutils: language = c++ +from libcpp.vector cimport vector as std_vector + from pyarrow.includes.common cimport * from pyarrow.includes.libarrow cimport * -cdef extern from "arrow/engine/substrait/util.h" namespace "arrow::engine::substrait" nogil: +cdef extern from "arrow/engine/substrait/util.h" namespace "arrow::engine" nogil: CResult[shared_ptr[CRecordBatchReader]] ExecuteSerializedPlan(const CBuffer& substrait_buffer) CResult[shared_ptr[CBuffer]] SerializeJsonPlan(const c_string& substrait_json) + +cdef extern from "arrow/engine/substrait/extension_set.h" \ + namespace "arrow::engine" nogil: + + cdef cppclass ExtensionIdRegistry: + std_vector[c_string] GetSupportedSubstraitFunctions() + + ExtensionIdRegistry* default_extension_id_registry() diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index d2e4f7062e68a..3dd60735c3cc8 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -121,6 +121,15 @@ cdef class NativeFile(_Weakrefable): def __exit__(self, exc_type, exc_value, tb): self.close() + def __repr__(self): + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"closed={self.closed} " + f"own_file={self.own_file} " + f"is_seekable={self.is_seekable} " + f"is_writable={self.is_writable} " + f"is_readable={self.is_readable}>") + @property def mode(self): """ @@ -399,6 +408,42 @@ cdef class NativeFile(_Weakrefable): return PyObject_to_object(obj) + def get_stream(self, file_offset, nbytes): + """ + Return an input stream that reads a file segment independent of the + state of the file. + + Allows reading portions of a random access file as an input stream + without interfering with each other. + + Parameters + ---------- + file_offset : int + nbytes : int + + Returns + ------- + stream : NativeFile + """ + cdef: + shared_ptr[CInputStream] data + int64_t c_file_offset + int64_t c_nbytes + + c_file_offset = file_offset + c_nbytes = nbytes + + handle = self.get_random_access_file() + + data = GetResultValue( + CRandomAccessFile.GetStream(handle, c_file_offset, c_nbytes)) + + stream = NativeFile() + stream.set_input_stream(data) + stream.is_readable = True + + return stream + def read_at(self, nbytes, offset): """ Read indicated number of bytes at offset from the file @@ -730,6 +775,13 @@ cdef class PythonFile(NativeFile): As a downside, there is a non-zero redirection cost in translating Arrow stream calls to Python method calls. Furthermore, Python's Global Interpreter Lock may limit parallelism in some situations. + + Examples + -------- + >>> import io + >>> import pyarrow as pa + >>> pa.PythonFile(io.BytesIO()) + """ cdef: object handle @@ -885,7 +937,7 @@ def memory_map(path, mode='r'): ---------- path : str mode : {'r', 'r+', 'w'}, default 'r' - Whether the file is opened for reading ('r+'), writing ('w') + Whether the file is opened for reading ('r'), writing ('w') or both ('r+'). Returns @@ -1017,6 +1069,14 @@ cdef class Buffer(_Weakrefable): def __len__(self): return self.size + def __repr__(self): + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"address={hex(self.address)} " + f"size={self.size} " + f"is_cpu={self.is_cpu} " + f"is_mutable={self.is_mutable}>") + @property def size(self): """ @@ -1807,6 +1867,17 @@ cdef class Codec(_Weakrefable): ------ ValueError If invalid compression value is passed. + + Examples + -------- + >>> import pyarrow as pa + >>> pa.Codec.is_available('gzip') + True + >>> codec = pa.Codec('gzip') + >>> codec.name + 'gzip' + >>> codec.compression_level + 9 """ def __init__(self, str compression not None, compression_level=None): @@ -1928,7 +1999,9 @@ cdef class Codec(_Weakrefable): @property def compression_level(self): """Returns the compression level parameter of the codec""" - return frombytes(self.unwrap().compression_level()) + if self.name == 'snappy': + return None + return self.unwrap().compression_level() def compress(self, object buf, asbytes=False, memory_pool=None): """ @@ -2044,6 +2117,12 @@ cdef class Codec(_Weakrefable): return pybuf if asbytes else out_buf + def __repr__(self): + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"name={self.name} " + f"compression_level={self.compression_level}>") + def compress(object buf, codec='lz4', asbytes=False, memory_pool=None): """ diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi index 2258be78d5479..1ddcb01ccb6ab 100644 --- a/python/pyarrow/memory.pxi +++ b/python/pyarrow/memory.pxi @@ -76,6 +76,12 @@ cdef class MemoryPool(_Weakrefable): """ return frombytes(self.pool.backend_name()) + def __repr__(self): + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"backend_name={self.backend_name} " + f"bytes_allocated={self.bytes_allocated()} " + f"max_memory={self.max_memory()}>") cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool): if memory_pool is None: @@ -118,6 +124,11 @@ cdef class ProxyMemoryPool(MemoryPool): def default_memory_pool(): """ Return the process-global memory pool. + + Examples + -------- + >>> default_memory_pool() + """ cdef: MemoryPool pool = MemoryPool.__new__(MemoryPool) diff --git a/python/pyarrow/parquet/__init__.py b/python/pyarrow/parquet/__init__.py index 868a83f0ebc1a..134f3c097ef00 100644 --- a/python/pyarrow/parquet/__init__.py +++ b/python/pyarrow/parquet/__init__.py @@ -15,3444 +15,6 @@ # specific language governing permissions and limitations # under the License. +# flake8: noqa -from collections import defaultdict -from concurrent import futures -from functools import partial, reduce - -import json -from collections.abc import Collection -import numpy as np -import os -import re -import operator -import urllib.parse -import warnings - -import pyarrow as pa -import pyarrow.lib as lib -import pyarrow._parquet as _parquet - -from pyarrow._parquet import (ParquetReader, Statistics, # noqa - FileMetaData, RowGroupMetaData, - ColumnChunkMetaData, - ParquetSchema, ColumnSchema, - ParquetLogicalType, - FileEncryptionProperties, - FileDecryptionProperties) -from pyarrow.fs import (LocalFileSystem, FileSystem, - _resolve_filesystem_and_path, _ensure_filesystem) -from pyarrow import filesystem as legacyfs -from pyarrow.util import guid, _is_path_like, _stringify_path - -_URI_STRIP_SCHEMES = ('hdfs',) - - -def _parse_uri(path): - path = _stringify_path(path) - parsed_uri = urllib.parse.urlparse(path) - if parsed_uri.scheme in _URI_STRIP_SCHEMES: - return parsed_uri.path - else: - # ARROW-4073: On Windows returning the path with the scheme - # stripped removes the drive letter, if any - return path - - -def _get_filesystem_and_path(passed_filesystem, path): - if passed_filesystem is None: - return legacyfs.resolve_filesystem_and_path(path, passed_filesystem) - else: - passed_filesystem = legacyfs._ensure_filesystem(passed_filesystem) - parsed_path = _parse_uri(path) - return passed_filesystem, parsed_path - - -def _check_contains_null(val): - if isinstance(val, bytes): - for byte in val: - if isinstance(byte, bytes): - compare_to = chr(0) - else: - compare_to = 0 - if byte == compare_to: - return True - elif isinstance(val, str): - return '\x00' in val - return False - - -def _check_filters(filters, check_null_strings=True): - """ - Check if filters are well-formed. - """ - if filters is not None: - if len(filters) == 0 or any(len(f) == 0 for f in filters): - raise ValueError("Malformed filters") - if isinstance(filters[0][0], str): - # We have encountered the situation where we have one nesting level - # too few: - # We have [(,,), ..] instead of [[(,,), ..]] - filters = [filters] - if check_null_strings: - for conjunction in filters: - for col, op, val in conjunction: - if ( - isinstance(val, list) and - all(_check_contains_null(v) for v in val) or - _check_contains_null(val) - ): - raise NotImplementedError( - "Null-terminated binary strings are not supported " - "as filter values." - ) - return filters - - -_DNF_filter_doc = """Predicates are expressed in disjunctive normal form (DNF), like - ``[[('x', '=', 0), ...], ...]``. DNF allows arbitrary boolean logical - combinations of single column predicates. The innermost tuples each - describe a single column predicate. The list of inner predicates is - interpreted as a conjunction (AND), forming a more selective and - multiple column predicate. Finally, the most outer list combines these - filters as a disjunction (OR). - - Predicates may also be passed as List[Tuple]. This form is interpreted - as a single conjunction. To express OR in predicates, one must - use the (preferred) List[List[Tuple]] notation. - - Each tuple has format: (``key``, ``op``, ``value``) and compares the - ``key`` with the ``value``. - The supported ``op`` are: ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``, - ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the - ``value`` must be a collection such as a ``list``, a ``set`` or a - ``tuple``. - - Examples: - - .. code-block:: python - - ('x', '=', 0) - ('y', 'in', ['a', 'b', 'c']) - ('z', 'not in', {'a','b'}) - - """ - - -def _filters_to_expression(filters): - """ - Check if filters are well-formed. - - See _DNF_filter_doc above for more details. - """ - import pyarrow.dataset as ds - - if isinstance(filters, ds.Expression): - return filters - - filters = _check_filters(filters, check_null_strings=False) - - def convert_single_predicate(col, op, val): - field = ds.field(col) - - if op == "=" or op == "==": - return field == val - elif op == "!=": - return field != val - elif op == '<': - return field < val - elif op == '>': - return field > val - elif op == '<=': - return field <= val - elif op == '>=': - return field >= val - elif op == 'in': - return field.isin(val) - elif op == 'not in': - return ~field.isin(val) - else: - raise ValueError( - '"{0}" is not a valid operator in predicates.'.format( - (col, op, val))) - - disjunction_members = [] - - for conjunction in filters: - conjunction_members = [ - convert_single_predicate(col, op, val) - for col, op, val in conjunction - ] - - disjunction_members.append(reduce(operator.and_, conjunction_members)) - - return reduce(operator.or_, disjunction_members) - - -# ---------------------------------------------------------------------- -# Reading a single Parquet file - - -class ParquetFile: - """ - Reader interface for a single Parquet file. - - Parameters - ---------- - source : str, pathlib.Path, pyarrow.NativeFile, or file-like object - Readable source. For passing bytes or buffer-like file containing a - Parquet file, use pyarrow.BufferReader. - metadata : FileMetaData, default None - Use existing metadata object, rather than reading from file. - common_metadata : FileMetaData, default None - Will be used in reads for pandas schema metadata if not found in the - main file's metadata, no other uses at the moment. - memory_map : bool, default False - If the source is a file path, use a memory map to read file, which can - improve performance in some environments. - buffer_size : int, default 0 - If positive, perform read buffering when deserializing individual - column chunks. Otherwise IO calls are unbuffered. - pre_buffer : bool, default False - Coalesce and issue file reads in parallel to improve performance on - high-latency filesystems (e.g. S3). If True, Arrow will use a - background I/O thread pool. - read_dictionary : list - List of column names to read directly as DictionaryArray. - coerce_int96_timestamp_unit : str, default None. - Cast timestamps that are stored in INT96 format to a particular - resolution (e.g. 'ms'). Setting to None is equivalent to 'ns' - and therefore INT96 timestamps will be inferred as timestamps - in nanoseconds. - decryption_properties : FileDecryptionProperties, default None - File decryption properties for Parquet Modular Encryption. - thrift_string_size_limit : int, default None - If not None, override the maximum total string size allocated - when decoding Thrift structures. The default limit should be - sufficient for most Parquet files. - thrift_container_size_limit : int, default None - If not None, override the maximum total size of containers allocated - when decoding Thrift structures. The default limit should be - sufficient for most Parquet files. - - Examples - -------- - - Generate an example PyArrow Table and write it to Parquet file: - - >>> import pyarrow as pa - >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - - >>> import pyarrow.parquet as pq - >>> pq.write_table(table, 'example.parquet') - - Create a ``ParquetFile`` object from the Parquet file: - - >>> parquet_file = pq.ParquetFile('example.parquet') - - Read the data: - - >>> parquet_file.read() - pyarrow.Table - n_legs: int64 - animal: string - ---- - n_legs: [[2,2,4,4,5,100]] - animal: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]] - - Create a ParquetFile object with "animal" column as DictionaryArray: - - >>> parquet_file = pq.ParquetFile('example.parquet', - ... read_dictionary=["animal"]) - >>> parquet_file.read() - pyarrow.Table - n_legs: int64 - animal: dictionary - ---- - n_legs: [[2,2,4,4,5,100]] - animal: [ -- dictionary: - ["Flamingo","Parrot",...,"Brittle stars","Centipede"] -- indices: - [0,1,2,3,4,5]] - """ - - def __init__(self, source, *, metadata=None, common_metadata=None, - read_dictionary=None, memory_map=False, buffer_size=0, - pre_buffer=False, coerce_int96_timestamp_unit=None, - decryption_properties=None, thrift_string_size_limit=None, - thrift_container_size_limit=None): - self.reader = ParquetReader() - self.reader.open( - source, use_memory_map=memory_map, - buffer_size=buffer_size, pre_buffer=pre_buffer, - read_dictionary=read_dictionary, metadata=metadata, - coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, - decryption_properties=decryption_properties, - thrift_string_size_limit=thrift_string_size_limit, - thrift_container_size_limit=thrift_container_size_limit, - ) - self.common_metadata = common_metadata - self._nested_paths_by_prefix = self._build_nested_paths() - - def _build_nested_paths(self): - paths = self.reader.column_paths - - result = defaultdict(list) - - for i, path in enumerate(paths): - key = path[0] - rest = path[1:] - while True: - result[key].append(i) - - if not rest: - break - - key = '.'.join((key, rest[0])) - rest = rest[1:] - - return result - - @property - def metadata(self): - """ - Return the Parquet metadata. - """ - return self.reader.metadata - - @property - def schema(self): - """ - Return the Parquet schema, unconverted to Arrow types - """ - return self.metadata.schema - - @property - def schema_arrow(self): - """ - Return the inferred Arrow schema, converted from the whole Parquet - file's schema - - Examples - -------- - Generate an example Parquet file: - - >>> import pyarrow as pa - >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_table(table, 'example.parquet') - >>> parquet_file = pq.ParquetFile('example.parquet') - - Read the Arrow schema: - - >>> parquet_file.schema_arrow - n_legs: int64 - animal: string - """ - return self.reader.schema_arrow - - @property - def num_row_groups(self): - """ - Return the number of row groups of the Parquet file. - - Examples - -------- - >>> import pyarrow as pa - >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_table(table, 'example.parquet') - >>> parquet_file = pq.ParquetFile('example.parquet') - - >>> parquet_file.num_row_groups - 1 - """ - return self.reader.num_row_groups - - def read_row_group(self, i, columns=None, use_threads=True, - use_pandas_metadata=False): - """ - Read a single row group from a Parquet file. - - Parameters - ---------- - i : int - Index of the individual row group that we want to read. - columns : list - If not None, only these columns will be read from the row group. A - column name may be a prefix of a nested field, e.g. 'a' will select - 'a.b', 'a.c', and 'a.d.e'. - use_threads : bool, default True - Perform multi-threaded column reads. - use_pandas_metadata : bool, default False - If True and file has custom pandas schema metadata, ensure that - index columns are also loaded. - - Returns - ------- - pyarrow.table.Table - Content of the row group as a table (of columns) - - Examples - -------- - >>> import pyarrow as pa - >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_table(table, 'example.parquet') - >>> parquet_file = pq.ParquetFile('example.parquet') - - >>> parquet_file.read_row_group(0) - pyarrow.Table - n_legs: int64 - animal: string - ---- - n_legs: [[2,2,4,4,5,100]] - animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]] - """ - column_indices = self._get_column_indices( - columns, use_pandas_metadata=use_pandas_metadata) - return self.reader.read_row_group(i, column_indices=column_indices, - use_threads=use_threads) - - def read_row_groups(self, row_groups, columns=None, use_threads=True, - use_pandas_metadata=False): - """ - Read a multiple row groups from a Parquet file. - - Parameters - ---------- - row_groups : list - Only these row groups will be read from the file. - columns : list - If not None, only these columns will be read from the row group. A - column name may be a prefix of a nested field, e.g. 'a' will select - 'a.b', 'a.c', and 'a.d.e'. - use_threads : bool, default True - Perform multi-threaded column reads. - use_pandas_metadata : bool, default False - If True and file has custom pandas schema metadata, ensure that - index columns are also loaded. - - Returns - ------- - pyarrow.table.Table - Content of the row groups as a table (of columns). - - Examples - -------- - >>> import pyarrow as pa - >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_table(table, 'example.parquet') - >>> parquet_file = pq.ParquetFile('example.parquet') - - >>> parquet_file.read_row_groups([0,0]) - pyarrow.Table - n_legs: int64 - animal: string - ---- - n_legs: [[2,2,4,4,5,...,2,4,4,5,100]] - animal: [["Flamingo","Parrot","Dog",...,"Brittle stars","Centipede"]] - """ - column_indices = self._get_column_indices( - columns, use_pandas_metadata=use_pandas_metadata) - return self.reader.read_row_groups(row_groups, - column_indices=column_indices, - use_threads=use_threads) - - def iter_batches(self, batch_size=65536, row_groups=None, columns=None, - use_threads=True, use_pandas_metadata=False): - """ - Read streaming batches from a Parquet file. - - Parameters - ---------- - batch_size : int, default 64K - Maximum number of records to yield per batch. Batches may be - smaller if there aren't enough rows in the file. - row_groups : list - Only these row groups will be read from the file. - columns : list - If not None, only these columns will be read from the file. A - column name may be a prefix of a nested field, e.g. 'a' will select - 'a.b', 'a.c', and 'a.d.e'. - use_threads : boolean, default True - Perform multi-threaded column reads. - use_pandas_metadata : boolean, default False - If True and file has custom pandas schema metadata, ensure that - index columns are also loaded. - - Returns - ------- - iterator of pyarrow.RecordBatch - Contents of each batch as a record batch - - Examples - -------- - Generate an example Parquet file: - - >>> import pyarrow as pa - >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_table(table, 'example.parquet') - >>> parquet_file = pq.ParquetFile('example.parquet') - >>> for i in parquet_file.iter_batches(): - ... print("RecordBatch") - ... print(i.to_pandas()) - ... - RecordBatch - n_legs animal - 0 2 Flamingo - 1 2 Parrot - 2 4 Dog - 3 4 Horse - 4 5 Brittle stars - 5 100 Centipede - """ - if row_groups is None: - row_groups = range(0, self.metadata.num_row_groups) - column_indices = self._get_column_indices( - columns, use_pandas_metadata=use_pandas_metadata) - - batches = self.reader.iter_batches(batch_size, - row_groups=row_groups, - column_indices=column_indices, - use_threads=use_threads) - return batches - - def read(self, columns=None, use_threads=True, use_pandas_metadata=False): - """ - Read a Table from Parquet format. - - Parameters - ---------- - columns : list - If not None, only these columns will be read from the file. A - column name may be a prefix of a nested field, e.g. 'a' will select - 'a.b', 'a.c', and 'a.d.e'. - use_threads : bool, default True - Perform multi-threaded column reads. - use_pandas_metadata : bool, default False - If True and file has custom pandas schema metadata, ensure that - index columns are also loaded. - - Returns - ------- - pyarrow.table.Table - Content of the file as a table (of columns). - - Examples - -------- - Generate an example Parquet file: - - >>> import pyarrow as pa - >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_table(table, 'example.parquet') - >>> parquet_file = pq.ParquetFile('example.parquet') - - Read a Table: - - >>> parquet_file.read(columns=["animal"]) - pyarrow.Table - animal: string - ---- - animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]] - """ - column_indices = self._get_column_indices( - columns, use_pandas_metadata=use_pandas_metadata) - return self.reader.read_all(column_indices=column_indices, - use_threads=use_threads) - - def scan_contents(self, columns=None, batch_size=65536): - """ - Read contents of file for the given columns and batch size. - - Notes - ----- - This function's primary purpose is benchmarking. - The scan is executed on a single thread. - - Parameters - ---------- - columns : list of integers, default None - Select columns to read, if None scan all columns. - batch_size : int, default 64K - Number of rows to read at a time internally. - - Returns - ------- - num_rows : number of rows in file - - Examples - -------- - >>> import pyarrow as pa - >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_table(table, 'example.parquet') - >>> parquet_file = pq.ParquetFile('example.parquet') - - >>> parquet_file.scan_contents() - 6 - """ - column_indices = self._get_column_indices(columns) - return self.reader.scan_contents(column_indices, - batch_size=batch_size) - - def _get_column_indices(self, column_names, use_pandas_metadata=False): - if column_names is None: - return None - - indices = [] - - for name in column_names: - if name in self._nested_paths_by_prefix: - indices.extend(self._nested_paths_by_prefix[name]) - - if use_pandas_metadata: - file_keyvalues = self.metadata.metadata - common_keyvalues = (self.common_metadata.metadata - if self.common_metadata is not None - else None) - - if file_keyvalues and b'pandas' in file_keyvalues: - index_columns = _get_pandas_index_columns(file_keyvalues) - elif common_keyvalues and b'pandas' in common_keyvalues: - index_columns = _get_pandas_index_columns(common_keyvalues) - else: - index_columns = [] - - if indices is not None and index_columns: - indices += [self.reader.column_name_idx(descr) - for descr in index_columns - if not isinstance(descr, dict)] - - return indices - - -_SPARK_DISALLOWED_CHARS = re.compile('[ ,;{}()\n\t=]') - - -def _sanitized_spark_field_name(name): - return _SPARK_DISALLOWED_CHARS.sub('_', name) - - -def _sanitize_schema(schema, flavor): - if 'spark' in flavor: - sanitized_fields = [] - - schema_changed = False - - for field in schema: - name = field.name - sanitized_name = _sanitized_spark_field_name(name) - - if sanitized_name != name: - schema_changed = True - sanitized_field = pa.field(sanitized_name, field.type, - field.nullable, field.metadata) - sanitized_fields.append(sanitized_field) - else: - sanitized_fields.append(field) - - new_schema = pa.schema(sanitized_fields, metadata=schema.metadata) - return new_schema, schema_changed - else: - return schema, False - - -def _sanitize_table(table, new_schema, flavor): - # TODO: This will not handle prohibited characters in nested field names - if 'spark' in flavor: - column_data = [table[i] for i in range(table.num_columns)] - return pa.Table.from_arrays(column_data, schema=new_schema) - else: - return table - - -_parquet_writer_arg_docs = """version : {"1.0", "2.4", "2.6"}, default "2.4" - Determine which Parquet logical types are available for use, whether the - reduced set from the Parquet 1.x.x format or the expanded logical types - added in later format versions. - Files written with version='2.4' or '2.6' may not be readable in all - Parquet implementations, so version='1.0' is likely the choice that - maximizes file compatibility. - UINT32 and some logical types are only available with version '2.4'. - Nanosecond timestamps are only available with version '2.6'. - Other features such as compression algorithms or the new serialized - data page format must be enabled separately (see 'compression' and - 'data_page_version'). -use_dictionary : bool or list - Specify if we should use dictionary encoding in general or only for - some columns. -use_deprecated_int96_timestamps : bool, default None - Write timestamps to INT96 Parquet format. Defaults to False unless enabled - by flavor argument. This take priority over the coerce_timestamps option. -coerce_timestamps : str, default None - Cast timestamps to a particular resolution. If omitted, defaults are chosen - depending on `version`. By default, for ``version='1.0'`` (the default) - and ``version='2.4'``, nanoseconds are cast to microseconds ('us'), while - for other `version` values, they are written natively without loss - of resolution. Seconds are always cast to milliseconds ('ms') by default, - as Parquet does not have any temporal type with seconds resolution. - If the casting results in loss of data, it will raise an exception - unless ``allow_truncated_timestamps=True`` is given. - Valid values: {None, 'ms', 'us'} -data_page_size : int, default None - Set a target threshold for the approximate encoded size of data - pages within a column chunk (in bytes). If None, use the default data page - size of 1MByte. -allow_truncated_timestamps : bool, default False - Allow loss of data when coercing timestamps to a particular - resolution. E.g. if microsecond or nanosecond data is lost when coercing to - 'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True`` - will NOT result in the truncation exception being ignored unless - ``coerce_timestamps`` is not None. -compression : str or dict - Specify the compression codec, either on a general basis or per-column. - Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}. -write_statistics : bool or list - Specify if we should write statistics in general (default is True) or only - for some columns. -flavor : {'spark'}, default None - Sanitize schema or set other compatibility options to work with - various target systems. -filesystem : FileSystem, default None - If nothing passed, will be inferred from `where` if path-like, else - `where` is already a file-like object so no filesystem is needed. -compression_level : int or dict, default None - Specify the compression level for a codec, either on a general basis or - per-column. If None is passed, arrow selects the compression level for - the compression codec in use. The compression level has a different - meaning for each codec, so you have to read the documentation of the - codec you are using. - An exception is thrown if the compression codec does not allow specifying - a compression level. -use_byte_stream_split : bool or list, default False - Specify if the byte_stream_split encoding should be used in general or - only for some columns. If both dictionary and byte_stream_stream are - enabled, then dictionary is preferred. - The byte_stream_split encoding is valid only for floating-point data types - and should be combined with a compression codec. -column_encoding : string or dict, default None - Specify the encoding scheme on a per column basis. - Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT'}. - Certain encodings are only compatible with certain data types. - Please refer to the encodings section of `Reading and writing Parquet - files `_. -data_page_version : {"1.0", "2.0"}, default "1.0" - The serialized Parquet data page format version to write, defaults to - 1.0. This does not impact the file schema logical types and Arrow to - Parquet type casting behavior; for that use the "version" option. -use_compliant_nested_type : bool, default False - Whether to write compliant Parquet nested type (lists) as defined - `here `_, defaults to ``False``. - For ``use_compliant_nested_type=True``, this will write into a list - with 3-level structure where the middle level, named ``list``, - is a repeated group with a single field named ``element``:: - - group (LIST) { - repeated group list { - element; - } - } - - For ``use_compliant_nested_type=False``, this will also write into a list - with 3-level structure, where the name of the single field of the middle - level ``list`` is taken from the element name for nested columns in Arrow, - which defaults to ``item``:: - - group (LIST) { - repeated group list { - item; - } - } -encryption_properties : FileEncryptionProperties, default None - File encryption properties for Parquet Modular Encryption. - If None, no encryption will be done. - The encryption properties can be created using: - ``CryptoFactory.file_encryption_properties()``. -write_batch_size : int, default None - Number of values to write to a page at a time. If None, use the default of - 1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages - are exceeding the ``data_page_size`` due to large column values, lowering - the batch size can help keep page sizes closer to the intended size. -dictionary_pagesize_limit : int, default None - Specify the dictionary page size limit per row group. If None, use the - default 1MB. -""" - -_parquet_writer_example_doc = """\ -Generate an example PyArrow Table and RecordBatch: - ->>> import pyarrow as pa ->>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], -... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", -... "Brittle stars", "Centipede"]}) ->>> batch = pa.record_batch([[2, 2, 4, 4, 5, 100], -... ["Flamingo", "Parrot", "Dog", "Horse", -... "Brittle stars", "Centipede"]], -... names=['n_legs', 'animal']) - -create a ParquetWriter object: - ->>> import pyarrow.parquet as pq ->>> writer = pq.ParquetWriter('example.parquet', table.schema) - -and write the Table into the Parquet file: - ->>> writer.write_table(table) ->>> writer.close() - ->>> pq.read_table('example.parquet').to_pandas() - n_legs animal -0 2 Flamingo -1 2 Parrot -2 4 Dog -3 4 Horse -4 5 Brittle stars -5 100 Centipede - -create a ParquetWriter object for the RecordBatch: - ->>> writer2 = pq.ParquetWriter('example2.parquet', batch.schema) - -and write the RecordBatch into the Parquet file: - ->>> writer2.write_batch(batch) ->>> writer2.close() - ->>> pq.read_table('example2.parquet').to_pandas() - n_legs animal -0 2 Flamingo -1 2 Parrot -2 4 Dog -3 4 Horse -4 5 Brittle stars -5 100 Centipede -""" - - -class ParquetWriter: - - __doc__ = """ -Class for incrementally building a Parquet file for Arrow tables. - -Parameters ----------- -where : path or file-like object -schema : pyarrow.Schema -{} -writer_engine_version : unused -**options : dict - If options contains a key `metadata_collector` then the - corresponding value is assumed to be a list (or any object with - `.append` method) that will be filled with the file metadata instance - of the written file. - -Examples --------- -{} -""".format(_parquet_writer_arg_docs, _parquet_writer_example_doc) - - def __init__(self, where, schema, filesystem=None, - flavor=None, - version='2.4', - use_dictionary=True, - compression='snappy', - write_statistics=True, - use_deprecated_int96_timestamps=None, - compression_level=None, - use_byte_stream_split=False, - column_encoding=None, - writer_engine_version=None, - data_page_version='1.0', - use_compliant_nested_type=False, - encryption_properties=None, - write_batch_size=None, - dictionary_pagesize_limit=None, - **options): - if use_deprecated_int96_timestamps is None: - # Use int96 timestamps for Spark - if flavor is not None and 'spark' in flavor: - use_deprecated_int96_timestamps = True - else: - use_deprecated_int96_timestamps = False - - self.flavor = flavor - if flavor is not None: - schema, self.schema_changed = _sanitize_schema(schema, flavor) - else: - self.schema_changed = False - - self.schema = schema - self.where = where - - # If we open a file using a filesystem, store file handle so we can be - # sure to close it when `self.close` is called. - self.file_handle = None - - filesystem, path = _resolve_filesystem_and_path( - where, filesystem, allow_legacy_filesystem=True - ) - if filesystem is not None: - if isinstance(filesystem, legacyfs.FileSystem): - # legacy filesystem (eg custom subclass) - # TODO deprecate - sink = self.file_handle = filesystem.open(path, 'wb') - else: - # ARROW-10480: do not auto-detect compression. While - # a filename like foo.parquet.gz is nonconforming, it - # shouldn't implicitly apply compression. - sink = self.file_handle = filesystem.open_output_stream( - path, compression=None) - else: - sink = where - self._metadata_collector = options.pop('metadata_collector', None) - engine_version = 'V2' - self.writer = _parquet.ParquetWriter( - sink, schema, - version=version, - compression=compression, - use_dictionary=use_dictionary, - write_statistics=write_statistics, - use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, - compression_level=compression_level, - use_byte_stream_split=use_byte_stream_split, - column_encoding=column_encoding, - writer_engine_version=engine_version, - data_page_version=data_page_version, - use_compliant_nested_type=use_compliant_nested_type, - encryption_properties=encryption_properties, - write_batch_size=write_batch_size, - dictionary_pagesize_limit=dictionary_pagesize_limit, - **options) - self.is_open = True - - def __del__(self): - if getattr(self, 'is_open', False): - self.close() - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - self.close() - # return false since we want to propagate exceptions - return False - - def write(self, table_or_batch, row_group_size=None): - """ - Write RecordBatch or Table to the Parquet file. - - Parameters - ---------- - table_or_batch : {RecordBatch, Table} - row_group_size : int, default None - Maximum size of each written row group. If None, the - row group size will be the minimum of the input - table or batch length and 64 * 1024 * 1024. - """ - if isinstance(table_or_batch, pa.RecordBatch): - self.write_batch(table_or_batch, row_group_size) - elif isinstance(table_or_batch, pa.Table): - self.write_table(table_or_batch, row_group_size) - else: - raise TypeError(type(table_or_batch)) - - def write_batch(self, batch, row_group_size=None): - """ - Write RecordBatch to the Parquet file. - - Parameters - ---------- - batch : RecordBatch - row_group_size : int, default None - Maximum size of each written row group. If None, the - row group size will be the minimum of the RecordBatch - size and 64 * 1024 * 1024. - """ - table = pa.Table.from_batches([batch], batch.schema) - self.write_table(table, row_group_size) - - def write_table(self, table, row_group_size=None): - """ - Write Table to the Parquet file. - - Parameters - ---------- - table : Table - row_group_size : int, default None - Maximum size of each written row group. If None, the - row group size will be the minimum of the Table size - and 64 * 1024 * 1024. - - """ - if self.schema_changed: - table = _sanitize_table(table, self.schema, self.flavor) - assert self.is_open - - if not table.schema.equals(self.schema, check_metadata=False): - msg = ('Table schema does not match schema used to create file: ' - '\ntable:\n{!s} vs. \nfile:\n{!s}' - .format(table.schema, self.schema)) - raise ValueError(msg) - - self.writer.write_table(table, row_group_size=row_group_size) - - def close(self): - """ - Close the connection to the Parquet file. - """ - if self.is_open: - self.writer.close() - self.is_open = False - if self._metadata_collector is not None: - self._metadata_collector.append(self.writer.metadata) - if self.file_handle is not None: - self.file_handle.close() - - -def _get_pandas_index_columns(keyvalues): - return (json.loads(keyvalues[b'pandas'].decode('utf8')) - ['index_columns']) - - -# ---------------------------------------------------------------------- -# Metadata container providing instructions about reading a single Parquet -# file, possibly part of a partitioned dataset - - -class ParquetDatasetPiece: - """ - DEPRECATED: A single chunk of a potentially larger Parquet dataset to read. - - The arguments will indicate to read either a single row group or all row - groups, and whether to add partition keys to the resulting pyarrow.Table. - - .. deprecated:: 5.0 - Directly constructing a ``ParquetDatasetPiece`` is deprecated, as well - as accessing the pieces of a ``ParquetDataset`` object. Specify - ``use_legacy_dataset=False`` when constructing the ``ParquetDataset`` - and use the ``ParquetDataset.fragments`` attribute instead. - - Parameters - ---------- - path : str or pathlib.Path - Path to file in the file system where this piece is located. - open_file_func : callable - Function to use for obtaining file handle to dataset piece. - partition_keys : list of tuples - Two-element tuples of ``(column name, ordinal index)``. - row_group : int, default None - Row group to load. By default, reads all row groups. - file_options : dict - Options - """ - - def __init__(self, path, open_file_func=partial(open, mode='rb'), - file_options=None, row_group=None, partition_keys=None): - warnings.warn( - "ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will " - "be removed in a future version.", - FutureWarning, stacklevel=2) - self._init( - path, open_file_func, file_options, row_group, partition_keys) - - @staticmethod - def _create(path, open_file_func=partial(open, mode='rb'), - file_options=None, row_group=None, partition_keys=None): - self = ParquetDatasetPiece.__new__(ParquetDatasetPiece) - self._init( - path, open_file_func, file_options, row_group, partition_keys) - return self - - def _init(self, path, open_file_func, file_options, row_group, - partition_keys): - self.path = _stringify_path(path) - self.open_file_func = open_file_func - self.row_group = row_group - self.partition_keys = partition_keys or [] - self.file_options = file_options or {} - - def __eq__(self, other): - if not isinstance(other, ParquetDatasetPiece): - return False - return (self.path == other.path and - self.row_group == other.row_group and - self.partition_keys == other.partition_keys) - - def __repr__(self): - return ('{}({!r}, row_group={!r}, partition_keys={!r})' - .format(type(self).__name__, self.path, - self.row_group, - self.partition_keys)) - - def __str__(self): - result = '' - - if len(self.partition_keys) > 0: - partition_str = ', '.join('{}={}'.format(name, index) - for name, index in self.partition_keys) - result += 'partition[{}] '.format(partition_str) - - result += self.path - - if self.row_group is not None: - result += ' | row_group={}'.format(self.row_group) - - return result - - def get_metadata(self): - """ - Return the file's metadata. - - Returns - ------- - metadata : FileMetaData - """ - f = self.open() - return f.metadata - - def open(self): - """ - Return instance of ParquetFile. - """ - reader = self.open_file_func(self.path) - if not isinstance(reader, ParquetFile): - reader = ParquetFile(reader, **self.file_options) - return reader - - def read(self, columns=None, use_threads=True, partitions=None, - file=None, use_pandas_metadata=False): - """ - Read this piece as a pyarrow.Table. - - Parameters - ---------- - columns : list of column names, default None - use_threads : bool, default True - Perform multi-threaded column reads. - partitions : ParquetPartitions, default None - file : file-like object - Passed to ParquetFile. - use_pandas_metadata : bool - If pandas metadata should be used or not. - - Returns - ------- - table : pyarrow.Table - """ - if self.open_file_func is not None: - reader = self.open() - elif file is not None: - reader = ParquetFile(file, **self.file_options) - else: - # try to read the local path - reader = ParquetFile(self.path, **self.file_options) - - options = dict(columns=columns, - use_threads=use_threads, - use_pandas_metadata=use_pandas_metadata) - - if self.row_group is not None: - table = reader.read_row_group(self.row_group, **options) - else: - table = reader.read(**options) - - if len(self.partition_keys) > 0: - if partitions is None: - raise ValueError('Must pass partition sets') - - # Here, the index is the categorical code of the partition where - # this piece is located. Suppose we had - # - # /foo=a/0.parq - # /foo=b/0.parq - # /foo=c/0.parq - # - # Then we assign a=0, b=1, c=2. And the resulting Table pieces will - # have a DictionaryArray column named foo having the constant index - # value as indicated. The distinct categories of the partition have - # been computed in the ParquetManifest - for i, (name, index) in enumerate(self.partition_keys): - # The partition code is the same for all values in this piece - indices = np.full(len(table), index, dtype='i4') - - # This is set of all partition values, computed as part of the - # manifest, so ['a', 'b', 'c'] as in our example above. - dictionary = partitions.levels[i].dictionary - - arr = pa.DictionaryArray.from_arrays(indices, dictionary) - table = table.append_column(name, arr) - - return table - - -class PartitionSet: - """ - A data structure for cataloguing the observed Parquet partitions at a - particular level. So if we have - - /foo=a/bar=0 - /foo=a/bar=1 - /foo=a/bar=2 - /foo=b/bar=0 - /foo=b/bar=1 - /foo=b/bar=2 - - Then we have two partition sets, one for foo, another for bar. As we visit - levels of the partition hierarchy, a PartitionSet tracks the distinct - values and assigns categorical codes to use when reading the pieces - - Parameters - ---------- - name : str - Name of the partition set. Under which key to collect all values. - keys : list - All possible values that have been collected for that partition set. - """ - - def __init__(self, name, keys=None): - self.name = name - self.keys = keys or [] - self.key_indices = {k: i for i, k in enumerate(self.keys)} - self._dictionary = None - - def get_index(self, key): - """ - Get the index of the partition value if it is known, otherwise assign - one - - Parameters - ---------- - key : The value for which we want to known the index. - """ - if key in self.key_indices: - return self.key_indices[key] - else: - index = len(self.key_indices) - self.keys.append(key) - self.key_indices[key] = index - return index - - @property - def dictionary(self): - if self._dictionary is not None: - return self._dictionary - - if len(self.keys) == 0: - raise ValueError('No known partition keys') - - # Only integer and string partition types are supported right now - try: - integer_keys = [int(x) for x in self.keys] - dictionary = lib.array(integer_keys) - except ValueError: - dictionary = lib.array(self.keys) - - self._dictionary = dictionary - return dictionary - - @property - def is_sorted(self): - return list(self.keys) == sorted(self.keys) - - -class ParquetPartitions: - - def __init__(self): - self.levels = [] - self.partition_names = set() - - def __len__(self): - return len(self.levels) - - def __getitem__(self, i): - return self.levels[i] - - def equals(self, other): - if not isinstance(other, ParquetPartitions): - raise TypeError('`other` must be an instance of ParquetPartitions') - - return (self.levels == other.levels and - self.partition_names == other.partition_names) - - def __eq__(self, other): - try: - return self.equals(other) - except TypeError: - return NotImplemented - - def get_index(self, level, name, key): - """ - Record a partition value at a particular level, returning the distinct - code for that value at that level. - - Examples - -------- - - partitions.get_index(1, 'foo', 'a') returns 0 - partitions.get_index(1, 'foo', 'b') returns 1 - partitions.get_index(1, 'foo', 'c') returns 2 - partitions.get_index(1, 'foo', 'a') returns 0 - - Parameters - ---------- - level : int - The nesting level of the partition we are observing - name : str - The partition name - key : str or int - The partition value - """ - if level == len(self.levels): - if name in self.partition_names: - raise ValueError('{} was the name of the partition in ' - 'another level'.format(name)) - - part_set = PartitionSet(name) - self.levels.append(part_set) - self.partition_names.add(name) - - return self.levels[level].get_index(key) - - def filter_accepts_partition(self, part_key, filter, level): - p_column, p_value_index = part_key - f_column, op, f_value = filter - if p_column != f_column: - return True - - f_type = type(f_value) - - if op in {'in', 'not in'}: - if not isinstance(f_value, Collection): - raise TypeError( - "'%s' object is not a collection", f_type.__name__) - if not f_value: - raise ValueError("Cannot use empty collection as filter value") - if len({type(item) for item in f_value}) != 1: - raise ValueError("All elements of the collection '%s' must be" - " of same type", f_value) - f_type = type(next(iter(f_value))) - - elif not isinstance(f_value, str) and isinstance(f_value, Collection): - raise ValueError( - "Op '%s' not supported with a collection value", op) - - p_value = f_type(self.levels[level] - .dictionary[p_value_index].as_py()) - - if op == "=" or op == "==": - return p_value == f_value - elif op == "!=": - return p_value != f_value - elif op == '<': - return p_value < f_value - elif op == '>': - return p_value > f_value - elif op == '<=': - return p_value <= f_value - elif op == '>=': - return p_value >= f_value - elif op == 'in': - return p_value in f_value - elif op == 'not in': - return p_value not in f_value - else: - raise ValueError("'%s' is not a valid operator in predicates.", - filter[1]) - - -class ParquetManifest: - - def __init__(self, dirpath, open_file_func=None, filesystem=None, - pathsep='/', partition_scheme='hive', metadata_nthreads=1): - filesystem, dirpath = _get_filesystem_and_path(filesystem, dirpath) - self.filesystem = filesystem - self.open_file_func = open_file_func - self.pathsep = pathsep - self.dirpath = _stringify_path(dirpath) - self.partition_scheme = partition_scheme - self.partitions = ParquetPartitions() - self.pieces = [] - self._metadata_nthreads = metadata_nthreads - self._thread_pool = futures.ThreadPoolExecutor( - max_workers=metadata_nthreads) - - self.common_metadata_path = None - self.metadata_path = None - - self._visit_level(0, self.dirpath, []) - - # Due to concurrency, pieces will potentially by out of order if the - # dataset is partitioned so we sort them to yield stable results - self.pieces.sort(key=lambda piece: piece.path) - - if self.common_metadata_path is None: - # _common_metadata is a subset of _metadata - self.common_metadata_path = self.metadata_path - - self._thread_pool.shutdown() - - def _visit_level(self, level, base_path, part_keys): - fs = self.filesystem - - _, directories, files = next(fs.walk(base_path)) - - filtered_files = [] - for path in files: - full_path = self.pathsep.join((base_path, path)) - if path.endswith('_common_metadata'): - self.common_metadata_path = full_path - elif path.endswith('_metadata'): - self.metadata_path = full_path - elif self._should_silently_exclude(path): - continue - else: - filtered_files.append(full_path) - - # ARROW-1079: Filter out "private" directories starting with underscore - filtered_directories = [self.pathsep.join((base_path, x)) - for x in directories - if not _is_private_directory(x)] - - filtered_files.sort() - filtered_directories.sort() - - if len(filtered_files) > 0 and len(filtered_directories) > 0: - raise ValueError('Found files in an intermediate ' - 'directory: {}'.format(base_path)) - elif len(filtered_directories) > 0: - self._visit_directories(level, filtered_directories, part_keys) - else: - self._push_pieces(filtered_files, part_keys) - - def _should_silently_exclude(self, file_name): - return (file_name.endswith('.crc') or # Checksums - file_name.endswith('_$folder$') or # HDFS directories in S3 - file_name.startswith('.') or # Hidden files starting with . - file_name.startswith('_') or # Hidden files starting with _ - file_name in EXCLUDED_PARQUET_PATHS) - - def _visit_directories(self, level, directories, part_keys): - futures_list = [] - for path in directories: - head, tail = _path_split(path, self.pathsep) - name, key = _parse_hive_partition(tail) - - index = self.partitions.get_index(level, name, key) - dir_part_keys = part_keys + [(name, index)] - # If you have less threads than levels, the wait call will block - # indefinitely due to multiple waits within a thread. - if level < self._metadata_nthreads: - future = self._thread_pool.submit(self._visit_level, - level + 1, - path, - dir_part_keys) - futures_list.append(future) - else: - self._visit_level(level + 1, path, dir_part_keys) - if futures_list: - futures.wait(futures_list) - - def _parse_partition(self, dirname): - if self.partition_scheme == 'hive': - return _parse_hive_partition(dirname) - else: - raise NotImplementedError('partition schema: {}' - .format(self.partition_scheme)) - - def _push_pieces(self, files, part_keys): - self.pieces.extend([ - ParquetDatasetPiece._create(path, partition_keys=part_keys, - open_file_func=self.open_file_func) - for path in files - ]) - - -def _parse_hive_partition(value): - if '=' not in value: - raise ValueError('Directory name did not appear to be a ' - 'partition: {}'.format(value)) - return value.split('=', 1) - - -def _is_private_directory(x): - _, tail = os.path.split(x) - return (tail.startswith('_') or tail.startswith('.')) and '=' not in tail - - -def _path_split(path, sep): - i = path.rfind(sep) + 1 - head, tail = path[:i], path[i:] - head = head.rstrip(sep) - return head, tail - - -EXCLUDED_PARQUET_PATHS = {'_SUCCESS'} - - -class _ParquetDatasetMetadata: - __slots__ = ('fs', 'memory_map', 'read_dictionary', 'common_metadata', - 'buffer_size') - - -def _open_dataset_file(dataset, path, meta=None): - if (dataset.fs is not None and - not isinstance(dataset.fs, legacyfs.LocalFileSystem)): - path = dataset.fs.open(path, mode='rb') - return ParquetFile( - path, - metadata=meta, - memory_map=dataset.memory_map, - read_dictionary=dataset.read_dictionary, - common_metadata=dataset.common_metadata, - buffer_size=dataset.buffer_size - ) - - -_DEPR_MSG = ( - "'{}' attribute is deprecated as of pyarrow 5.0.0 and will be removed " - "in a future version.{}" -) - - -_read_docstring_common = """\ -read_dictionary : list, default None - List of names or column paths (for nested types) to read directly - as DictionaryArray. Only supported for BYTE_ARRAY storage. To read - a flat column as dictionary-encoded pass the column name. For - nested types, you must pass the full column "path", which could be - something like level1.level2.list.item. Refer to the Parquet - file's schema to obtain the paths. -memory_map : bool, default False - If the source is a file path, use a memory map to read file, which can - improve performance in some environments. -buffer_size : int, default 0 - If positive, perform read buffering when deserializing individual - column chunks. Otherwise IO calls are unbuffered. -partitioning : pyarrow.dataset.Partitioning or str or list of str, \ -default "hive" - The partitioning scheme for a partitioned dataset. The default of "hive" - assumes directory names with key=value pairs like "/year=2009/month=11". - In addition, a scheme like "/2009/11" is also supported, in which case - you need to specify the field names or a full schema. See the - ``pyarrow.dataset.partitioning()`` function for more details.""" - -_parquet_dataset_example = """\ -Generate an example PyArrow Table and write it to a partitioned dataset: - ->>> import pyarrow as pa ->>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], -... 'n_legs': [2, 2, 4, 4, 5, 100], -... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", -... "Brittle stars", "Centipede"]}) - ->>> import pyarrow.parquet as pq ->>> pq.write_to_dataset(table, root_path='dataset_name', -... partition_cols=['year'], -... use_legacy_dataset=False) - -create a ParquetDataset object from the dataset source: - ->>> dataset = pq.ParquetDataset('dataset_name/', use_legacy_dataset=False) - -and read the data: - ->>> dataset.read().to_pandas() - n_legs animal year -0 5 Brittle stars 2019 -1 2 Flamingo 2020 -2 4 Dog 2021 -3 100 Centipede 2021 -4 2 Parrot 2022 -5 4 Horse 2022 - -create a ParquetDataset object with filter: - ->>> dataset = pq.ParquetDataset('dataset_name/', use_legacy_dataset=False, -... filters=[('n_legs','=',4)]) ->>> dataset.read().to_pandas() - n_legs animal year -0 4 Dog 2021 -1 4 Horse 2022 -""" - - -class ParquetDataset: - - __doc__ = """ -Encapsulates details of reading a complete Parquet dataset possibly -consisting of multiple files and partitions in subdirectories. - -Parameters ----------- -path_or_paths : str or List[str] - A directory name, single file name, or list of file names. -filesystem : FileSystem, default None - If nothing passed, will be inferred based on path. - Path will try to be found in the local on-disk filesystem otherwise - it will be parsed as an URI to determine the filesystem. -metadata : pyarrow.parquet.FileMetaData - Use metadata obtained elsewhere to validate file schemas. -schema : pyarrow.parquet.Schema - Use schema obtained elsewhere to validate file schemas. Alternative to - metadata parameter. -split_row_groups : bool, default False - Divide files into pieces for each row group in the file. -validate_schema : bool, default True - Check that individual file schemas are all the same / compatible. -filters : List[Tuple] or List[List[Tuple]] or None (default) - Rows which do not match the filter predicate will be removed from scanned - data. Partition keys embedded in a nested directory structure will be - exploited to avoid loading files at all if they contain no matching rows. - If `use_legacy_dataset` is True, filters can only reference partition - keys and only a hive-style directory structure is supported. When - setting `use_legacy_dataset` to False, also within-file level filtering - and different partitioning schemes are supported. - - {1} -metadata_nthreads : int, default 1 - How many threads to allow the thread pool which is used to read the - dataset metadata. Increasing this is helpful to read partitioned - datasets. -{0} -use_legacy_dataset : bool, default True - Set to False to enable the new code path (using the - new Arrow Dataset API). Among other things, this allows to pass - `filters` for all columns and not only the partition keys, enables - different partitioning schemes, etc. -pre_buffer : bool, default True - Coalesce and issue file reads in parallel to improve performance on - high-latency filesystems (e.g. S3). If True, Arrow will use a - background I/O thread pool. This option is only supported for - use_legacy_dataset=False. If using a filesystem layer that itself - performs readahead (e.g. fsspec's S3FS), disable readahead for best - results. -coerce_int96_timestamp_unit : str, default None. - Cast timestamps that are stored in INT96 format to a particular resolution - (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96 - timestamps will be inferred as timestamps in nanoseconds. -thrift_string_size_limit : int, default None - If not None, override the maximum total string size allocated - when decoding Thrift structures. The default limit should be - sufficient for most Parquet files. -thrift_container_size_limit : int, default None - If not None, override the maximum total size of containers allocated - when decoding Thrift structures. The default limit should be - sufficient for most Parquet files. - -Examples --------- -{2} -""".format(_read_docstring_common, _DNF_filter_doc, _parquet_dataset_example) - - def __new__(cls, path_or_paths=None, filesystem=None, schema=None, - metadata=None, split_row_groups=False, validate_schema=True, - filters=None, metadata_nthreads=None, read_dictionary=None, - memory_map=False, buffer_size=0, partitioning="hive", - use_legacy_dataset=None, pre_buffer=True, - coerce_int96_timestamp_unit=None, - thrift_string_size_limit=None, - thrift_container_size_limit=None): - if use_legacy_dataset is None: - # if a new filesystem is passed -> default to new implementation - if isinstance(filesystem, FileSystem): - use_legacy_dataset = False - # otherwise the default is still True - else: - use_legacy_dataset = True - - if not use_legacy_dataset: - return _ParquetDatasetV2( - path_or_paths, filesystem=filesystem, - filters=filters, - partitioning=partitioning, - read_dictionary=read_dictionary, - memory_map=memory_map, - buffer_size=buffer_size, - pre_buffer=pre_buffer, - coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, - # unsupported keywords - schema=schema, metadata=metadata, - split_row_groups=split_row_groups, - validate_schema=validate_schema, - metadata_nthreads=metadata_nthreads, - thrift_string_size_limit=thrift_string_size_limit, - thrift_container_size_limit=thrift_container_size_limit, - ) - self = object.__new__(cls) - return self - - def __init__(self, path_or_paths, filesystem=None, schema=None, - metadata=None, split_row_groups=False, validate_schema=True, - filters=None, metadata_nthreads=None, read_dictionary=None, - memory_map=False, buffer_size=0, partitioning="hive", - use_legacy_dataset=True, pre_buffer=True, - coerce_int96_timestamp_unit=None, - thrift_string_size_limit=None, - thrift_container_size_limit=None): - if partitioning != "hive": - raise ValueError( - 'Only "hive" for hive-like partitioning is supported when ' - 'using use_legacy_dataset=True') - if metadata_nthreads is not None: - warnings.warn( - "Specifying the 'metadata_nthreads' argument is deprecated as " - "of pyarrow 8.0.0, and the argument will be removed in a " - "future version", - FutureWarning, stacklevel=2, - ) - else: - metadata_nthreads = 1 - - self._ds_metadata = _ParquetDatasetMetadata() - a_path = path_or_paths - if isinstance(a_path, list): - a_path = a_path[0] - - self._ds_metadata.fs, _ = _get_filesystem_and_path(filesystem, a_path) - if isinstance(path_or_paths, list): - self.paths = [_parse_uri(path) for path in path_or_paths] - else: - self.paths = _parse_uri(path_or_paths) - - self._ds_metadata.read_dictionary = read_dictionary - self._ds_metadata.memory_map = memory_map - self._ds_metadata.buffer_size = buffer_size - - (self._pieces, - self._partitions, - self._common_metadata_path, - self._metadata_path) = _make_manifest( - path_or_paths, self._fs, metadata_nthreads=metadata_nthreads, - open_file_func=partial(_open_dataset_file, self._ds_metadata) - ) - - if self._common_metadata_path is not None: - with self._fs.open(self._common_metadata_path) as f: - self._ds_metadata.common_metadata = read_metadata( - f, - memory_map=memory_map - ) - else: - self._ds_metadata.common_metadata = None - - if metadata is not None: - warnings.warn( - "Specifying the 'metadata' argument with 'use_legacy_dataset=" - "True' is deprecated as of pyarrow 8.0.0.", - FutureWarning, stacklevel=2) - - if metadata is None and self._metadata_path is not None: - with self._fs.open(self._metadata_path) as f: - self._metadata = read_metadata(f, memory_map=memory_map) - else: - self._metadata = metadata - - if schema is not None: - warnings.warn( - "Specifying the 'schema' argument with 'use_legacy_dataset=" - "True' is deprecated as of pyarrow 8.0.0. You can still " - "specify it in combination with 'use_legacy_dataet=False', " - "but in that case you need to specify a pyarrow.Schema " - "instead of a ParquetSchema.", - FutureWarning, stacklevel=2) - self._schema = schema - - self.split_row_groups = split_row_groups - - if split_row_groups: - raise NotImplementedError("split_row_groups not yet implemented") - - if filters is not None: - filters = _check_filters(filters) - self._filter(filters) - - if validate_schema: - self.validate_schemas() - - def equals(self, other): - if not isinstance(other, ParquetDataset): - raise TypeError('`other` must be an instance of ParquetDataset') - - if self._fs.__class__ != other._fs.__class__: - return False - for prop in ('paths', '_pieces', '_partitions', - '_common_metadata_path', '_metadata_path', - '_common_metadata', '_metadata', '_schema', - 'split_row_groups'): - if getattr(self, prop) != getattr(other, prop): - return False - for prop in ('memory_map', 'buffer_size'): - if ( - getattr(self._ds_metadata, prop) != - getattr(other._ds_metadata, prop) - ): - return False - - return True - - def __eq__(self, other): - try: - return self.equals(other) - except TypeError: - return NotImplemented - - def validate_schemas(self): - if self._metadata is None and self._schema is None: - if self._common_metadata is not None: - self._schema = self._common_metadata.schema - else: - self._schema = self._pieces[0].get_metadata().schema - elif self._schema is None: - self._schema = self._metadata.schema - - # Verify schemas are all compatible - dataset_schema = self._schema.to_arrow_schema() - # Exclude the partition columns from the schema, they are provided - # by the path, not the DatasetPiece - if self._partitions is not None: - for partition_name in self._partitions.partition_names: - if dataset_schema.get_field_index(partition_name) != -1: - field_idx = dataset_schema.get_field_index(partition_name) - dataset_schema = dataset_schema.remove(field_idx) - - for piece in self._pieces: - file_metadata = piece.get_metadata() - file_schema = file_metadata.schema.to_arrow_schema() - if not dataset_schema.equals(file_schema, check_metadata=False): - raise ValueError('Schema in {!s} was different. \n' - '{!s}\n\nvs\n\n{!s}' - .format(piece, file_schema, - dataset_schema)) - - def read(self, columns=None, use_threads=True, use_pandas_metadata=False): - """ - Read multiple Parquet files as a single pyarrow.Table. - - Parameters - ---------- - columns : List[str] - Names of columns to read from the file. - use_threads : bool, default True - Perform multi-threaded column reads - use_pandas_metadata : bool, default False - Passed through to each dataset piece. - - Returns - ------- - pyarrow.Table - Content of the file as a table (of columns). - - Examples - -------- - Generate an example dataset: - - >>> import pyarrow as pa - >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_to_dataset(table, root_path='dataset_name_read', - ... partition_cols=['year'], - ... use_legacy_dataset=False) - >>> dataset = pq.ParquetDataset('dataset_name_read/', - ... use_legacy_dataset=False) - - Read multiple Parquet files as a single pyarrow.Table: - - >>> dataset.read(columns=["n_legs"]) - pyarrow.Table - n_legs: int64 - ---- - n_legs: [[5],[2],[4,100],[2,4]] - """ - tables = [] - for piece in self._pieces: - table = piece.read(columns=columns, use_threads=use_threads, - partitions=self._partitions, - use_pandas_metadata=use_pandas_metadata) - tables.append(table) - - all_data = lib.concat_tables(tables) - - if use_pandas_metadata: - # We need to ensure that this metadata is set in the Table's schema - # so that Table.to_pandas will construct pandas.DataFrame with the - # right index - common_metadata = self._get_common_pandas_metadata() - current_metadata = all_data.schema.metadata or {} - - if common_metadata and b'pandas' not in current_metadata: - all_data = all_data.replace_schema_metadata({ - b'pandas': common_metadata}) - - return all_data - - def read_pandas(self, **kwargs): - """ - Read dataset including pandas metadata, if any. Other arguments passed - through to ParquetDataset.read, see docstring for further details. - - Parameters - ---------- - **kwargs : optional - All additional options to pass to the reader. - - Returns - ------- - pyarrow.Table - Content of the file as a table (of columns). - - Examples - -------- - Generate an example PyArrow Table and write it to a partitioned - dataset: - - >>> import pyarrow as pa - >>> import pandas as pd - >>> df = pd.DataFrame({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> table = pa.Table.from_pandas(df) - >>> import pyarrow.parquet as pq - >>> pq.write_table(table, 'table.parquet') - >>> dataset = pq.ParquetDataset('table.parquet', - ... use_legacy_dataset=False) - - Read dataset including pandas metadata: - - >>> dataset.read_pandas(columns=["n_legs"]) - pyarrow.Table - n_legs: int64 - ---- - n_legs: [[2,2,4,4,5,100]] - - Select pandas metadata: - - >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata - {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...} - """ - return self.read(use_pandas_metadata=True, **kwargs) - - def _get_common_pandas_metadata(self): - if self._common_metadata is None: - return None - - keyvalues = self._common_metadata.metadata - return keyvalues.get(b'pandas', None) - - def _filter(self, filters): - accepts_filter = self._partitions.filter_accepts_partition - - def one_filter_accepts(piece, filter): - return all(accepts_filter(part_key, filter, level) - for level, part_key in enumerate(piece.partition_keys)) - - def all_filters_accept(piece): - return any(all(one_filter_accepts(piece, f) for f in conjunction) - for conjunction in filters) - - self._pieces = [p for p in self._pieces if all_filters_accept(p)] - - @property - def pieces(self): - """ - DEPRECATED - """ - warnings.warn( - _DEPR_MSG.format( - "ParquetDataset.pieces", - " Specify 'use_legacy_dataset=False' while constructing the " - "ParquetDataset, and then use the '.fragments' attribute " - "instead."), - FutureWarning, stacklevel=2) - return self._pieces - - @property - def partitions(self): - """ - DEPRECATED - """ - warnings.warn( - _DEPR_MSG.format( - "ParquetDataset.partitions", - " Specify 'use_legacy_dataset=False' while constructing the " - "ParquetDataset, and then use the '.partitioning' attribute " - "instead."), - FutureWarning, stacklevel=2) - return self._partitions - - @property - def schema(self): - warnings.warn( - _DEPR_MSG.format( - "ParquetDataset.schema", - " Specify 'use_legacy_dataset=False' while constructing the " - "ParquetDataset, and then use the '.schema' attribute " - "instead (which will return an Arrow schema instead of a " - "Parquet schema)."), - FutureWarning, stacklevel=2) - return self._schema - - @property - def memory_map(self): - """ - DEPRECATED - """ - warnings.warn( - _DEPR_MSG.format("ParquetDataset.memory_map", ""), - FutureWarning, stacklevel=2) - return self._ds_metadata.memory_map - - @property - def read_dictionary(self): - """ - DEPRECATED - """ - warnings.warn( - _DEPR_MSG.format("ParquetDataset.read_dictionary", ""), - FutureWarning, stacklevel=2) - return self._ds_metadata.read_dictionary - - @property - def buffer_size(self): - """ - DEPRECATED - """ - warnings.warn( - _DEPR_MSG.format("ParquetDataset.buffer_size", ""), - FutureWarning, stacklevel=2) - return self._ds_metadata.buffer_size - - _fs = property( - operator.attrgetter('_ds_metadata.fs') - ) - - @property - def fs(self): - """ - DEPRECATED - """ - warnings.warn( - _DEPR_MSG.format( - "ParquetDataset.fs", - " Specify 'use_legacy_dataset=False' while constructing the " - "ParquetDataset, and then use the '.filesystem' attribute " - "instead."), - FutureWarning, stacklevel=2) - return self._ds_metadata.fs - - @property - def metadata(self): - """ - DEPRECATED - """ - warnings.warn( - _DEPR_MSG.format("ParquetDataset.metadata", ""), - FutureWarning, stacklevel=2) - return self._metadata - - @property - def metadata_path(self): - """ - DEPRECATED - """ - warnings.warn( - _DEPR_MSG.format("ParquetDataset.metadata_path", ""), - FutureWarning, stacklevel=2) - return self._metadata_path - - @property - def common_metadata_path(self): - """ - DEPRECATED - """ - warnings.warn( - _DEPR_MSG.format("ParquetDataset.common_metadata_path", ""), - FutureWarning, stacklevel=2) - return self._common_metadata_path - - _common_metadata = property( - operator.attrgetter('_ds_metadata.common_metadata') - ) - - @property - def common_metadata(self): - """ - DEPRECATED - """ - warnings.warn( - _DEPR_MSG.format("ParquetDataset.common_metadata", ""), - FutureWarning, stacklevel=2) - return self._ds_metadata.common_metadata - - @property - def fragments(self): - """ - A list of the Dataset source fragments or pieces with absolute - file paths. To use this property set 'use_legacy_dataset=False' - while constructing ParquetDataset object. - - Examples - -------- - Generate an example dataset: - - >>> import pyarrow as pa - >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_to_dataset(table, root_path='dataset_name_fragments', - ... partition_cols=['year'], - ... use_legacy_dataset=False) - >>> dataset = pq.ParquetDataset('dataset_name_fragments/', - ... use_legacy_dataset=False) - - List the fragments: - - >>> dataset.fragments - [>> import pyarrow as pa - >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_to_dataset(table, root_path='dataset_name_files', - ... partition_cols=['year'], - ... use_legacy_dataset=False) - >>> dataset = pq.ParquetDataset('dataset_name_files/', - ... use_legacy_dataset=False) - - List the files: - - >>> dataset.files - ['dataset_name_files/year=2019/...-0.parquet', ... - """ - raise NotImplementedError( - "To use this property set 'use_legacy_dataset=False' while " - "constructing the ParquetDataset") - - @property - def filesystem(self): - """ - The filesystem type of the Dataset source. - To use this property set 'use_legacy_dataset=False' - while constructing ParquetDataset object. - """ - raise NotImplementedError( - "To use this property set 'use_legacy_dataset=False' while " - "constructing the ParquetDataset") - - @property - def partitioning(self): - """ - The partitioning of the Dataset source, if discovered. - To use this property set 'use_legacy_dataset=False' - while constructing ParquetDataset object. - """ - raise NotImplementedError( - "To use this property set 'use_legacy_dataset=False' while " - "constructing the ParquetDataset") - - -def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1, - open_file_func=None): - partitions = None - common_metadata_path = None - metadata_path = None - - if isinstance(path_or_paths, list) and len(path_or_paths) == 1: - # Dask passes a directory as a list of length 1 - path_or_paths = path_or_paths[0] - - if _is_path_like(path_or_paths) and fs.isdir(path_or_paths): - manifest = ParquetManifest(path_or_paths, filesystem=fs, - open_file_func=open_file_func, - pathsep=getattr(fs, "pathsep", "/"), - metadata_nthreads=metadata_nthreads) - common_metadata_path = manifest.common_metadata_path - metadata_path = manifest.metadata_path - pieces = manifest.pieces - partitions = manifest.partitions - else: - if not isinstance(path_or_paths, list): - path_or_paths = [path_or_paths] - - # List of paths - if len(path_or_paths) == 0: - raise ValueError('Must pass at least one file path') - - pieces = [] - for path in path_or_paths: - if not fs.isfile(path): - raise OSError('Passed non-file path: {}' - .format(path)) - piece = ParquetDatasetPiece._create( - path, open_file_func=open_file_func) - pieces.append(piece) - - return pieces, partitions, common_metadata_path, metadata_path - - -def _is_local_file_system(fs): - return isinstance(fs, LocalFileSystem) or isinstance( - fs, legacyfs.LocalFileSystem - ) - - -class _ParquetDatasetV2: - """ - ParquetDataset shim using the Dataset API under the hood. - - Examples - -------- - Generate an example PyArrow Table and write it to a partitioned dataset: - - >>> import pyarrow as pa - >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_to_dataset(table, root_path='dataset_v2', - ... partition_cols=['year'], - ... use_legacy_dataset=False) - - create a ParquetDataset object from the dataset source: - - >>> dataset = pq.ParquetDataset('dataset_v2/', use_legacy_dataset=False) - - and read the data: - - >>> dataset.read().to_pandas() - n_legs animal year - 0 5 Brittle stars 2019 - 1 2 Flamingo 2020 - 2 4 Dog 2021 - 3 100 Centipede 2021 - 4 2 Parrot 2022 - 5 4 Horse 2022 - - create a ParquetDataset object with filter: - - >>> dataset = pq.ParquetDataset('dataset_v2/', - ... filters=[('n_legs','=',4)], - ... use_legacy_dataset=False) - >>> dataset.read().to_pandas() - n_legs animal year - 0 4 Dog 2021 - 1 4 Horse 2022 - """ - - def __init__(self, path_or_paths, filesystem=None, *, filters=None, - partitioning="hive", read_dictionary=None, buffer_size=None, - memory_map=False, ignore_prefixes=None, pre_buffer=True, - coerce_int96_timestamp_unit=None, schema=None, - decryption_properties=None, thrift_string_size_limit=None, - thrift_container_size_limit=None, - **kwargs): - import pyarrow.dataset as ds - - # Raise error for not supported keywords - for keyword, default in [ - ("metadata", None), ("split_row_groups", False), - ("validate_schema", True), ("metadata_nthreads", None)]: - if keyword in kwargs and kwargs[keyword] is not default: - raise ValueError( - "Keyword '{0}' is not yet supported with the new " - "Dataset API".format(keyword)) - - # map format arguments - read_options = { - "pre_buffer": pre_buffer, - "coerce_int96_timestamp_unit": coerce_int96_timestamp_unit, - "thrift_string_size_limit": thrift_string_size_limit, - "thrift_container_size_limit": thrift_container_size_limit, - } - if buffer_size: - read_options.update(use_buffered_stream=True, - buffer_size=buffer_size) - if read_dictionary is not None: - read_options.update(dictionary_columns=read_dictionary) - - if decryption_properties is not None: - read_options.update(decryption_properties=decryption_properties) - - # map filters to Expressions - self._filters = filters - self._filter_expression = filters and _filters_to_expression(filters) - - # map old filesystems to new one - if filesystem is not None: - filesystem = _ensure_filesystem( - filesystem, use_mmap=memory_map) - elif filesystem is None and memory_map: - # if memory_map is specified, assume local file system (string - # path can in principle be URI for any filesystem) - filesystem = LocalFileSystem(use_mmap=memory_map) - - # This needs to be checked after _ensure_filesystem, because that - # handles the case of an fsspec LocalFileSystem - if ( - hasattr(path_or_paths, "__fspath__") and - filesystem is not None and - not _is_local_file_system(filesystem) - ): - raise TypeError( - "Path-like objects with __fspath__ must only be used with " - f"local file systems, not {type(filesystem)}" - ) - - # check for single fragment dataset - single_file = None - if not isinstance(path_or_paths, list): - if _is_path_like(path_or_paths): - path_or_paths = _stringify_path(path_or_paths) - if filesystem is None: - # path might be a URI describing the FileSystem as well - try: - filesystem, path_or_paths = FileSystem.from_uri( - path_or_paths) - except ValueError: - filesystem = LocalFileSystem(use_mmap=memory_map) - if filesystem.get_file_info(path_or_paths).is_file: - single_file = path_or_paths - else: - single_file = path_or_paths - - parquet_format = ds.ParquetFileFormat(**read_options) - - if single_file is not None: - fragment = parquet_format.make_fragment(single_file, filesystem) - - self._dataset = ds.FileSystemDataset( - [fragment], schema=schema or fragment.physical_schema, - format=parquet_format, - filesystem=fragment.filesystem - ) - return - - # check partitioning to enable dictionary encoding - if partitioning == "hive": - partitioning = ds.HivePartitioning.discover( - infer_dictionary=True) - - self._dataset = ds.dataset(path_or_paths, filesystem=filesystem, - schema=schema, format=parquet_format, - partitioning=partitioning, - ignore_prefixes=ignore_prefixes) - - @property - def schema(self): - """ - Schema of the Dataset. - - Examples - -------- - Generate an example dataset: - - >>> import pyarrow as pa - >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_to_dataset(table, root_path='dataset_v2_schema', - ... partition_cols=['year'], - ... use_legacy_dataset=False) - >>> dataset = pq.ParquetDataset('dataset_v2_schema/', - ... use_legacy_dataset=False) - - Read the schema: - - >>> dataset.schema - n_legs: int64 - animal: string - year: dictionary - """ - return self._dataset.schema - - def read(self, columns=None, use_threads=True, use_pandas_metadata=False): - """ - Read (multiple) Parquet files as a single pyarrow.Table. - - Parameters - ---------- - columns : List[str] - Names of columns to read from the dataset. The partition fields - are not automatically included (in contrast to when setting - ``use_legacy_dataset=True``). - use_threads : bool, default True - Perform multi-threaded column reads. - use_pandas_metadata : bool, default False - If True and file has custom pandas schema metadata, ensure that - index columns are also loaded. - - Returns - ------- - pyarrow.Table - Content of the file as a table (of columns). - - Examples - -------- - Generate an example dataset: - - >>> import pyarrow as pa - >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_to_dataset(table, root_path='dataset_v2_read', - ... partition_cols=['year'], - ... use_legacy_dataset=False) - >>> dataset = pq.ParquetDataset('dataset_v2_read/', - ... use_legacy_dataset=False) - - Read the dataset: - - >>> dataset.read(columns=["n_legs"]) - pyarrow.Table - n_legs: int64 - ---- - n_legs: [[5],[2],[4,100],[2,4]] - """ - # if use_pandas_metadata, we need to include index columns in the - # column selection, to be able to restore those in the pandas DataFrame - metadata = self.schema.metadata - if columns is not None and use_pandas_metadata: - if metadata and b'pandas' in metadata: - # RangeIndex can be represented as dict instead of column name - index_columns = [ - col for col in _get_pandas_index_columns(metadata) - if not isinstance(col, dict) - ] - columns = ( - list(columns) + list(set(index_columns) - set(columns)) - ) - - table = self._dataset.to_table( - columns=columns, filter=self._filter_expression, - use_threads=use_threads - ) - - # if use_pandas_metadata, restore the pandas metadata (which gets - # lost if doing a specific `columns` selection in to_table) - if use_pandas_metadata: - if metadata and b"pandas" in metadata: - new_metadata = table.schema.metadata or {} - new_metadata.update({b"pandas": metadata[b"pandas"]}) - table = table.replace_schema_metadata(new_metadata) - - return table - - def read_pandas(self, **kwargs): - """ - Read dataset including pandas metadata, if any. Other arguments passed - through to ParquetDataset.read, see docstring for further details. - - Examples - -------- - Generate an example parquet file: - - >>> import pyarrow as pa - >>> import pandas as pd - >>> df = pd.DataFrame({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> table = pa.Table.from_pandas(df) - >>> import pyarrow.parquet as pq - >>> pq.write_table(table, 'table_V2.parquet') - >>> dataset = pq.ParquetDataset('table_V2.parquet', - ... use_legacy_dataset=False) - - Read the dataset with pandas metadata: - - >>> dataset.read_pandas(columns=["n_legs"]) - pyarrow.Table - n_legs: int64 - ---- - n_legs: [[2,2,4,4,5,100]] - - >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata - {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...} - """ - return self.read(use_pandas_metadata=True, **kwargs) - - @property - def pieces(self): - warnings.warn( - _DEPR_MSG.format("ParquetDataset.pieces", - " Use the '.fragments' attribute instead"), - FutureWarning, stacklevel=2) - return list(self._dataset.get_fragments()) - - @property - def fragments(self): - """ - A list of the Dataset source fragments or pieces with absolute - file paths. - - Examples - -------- - Generate an example dataset: - - >>> import pyarrow as pa - >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_to_dataset(table, root_path='dataset_v2_fragments', - ... partition_cols=['year'], - ... use_legacy_dataset=False) - >>> dataset = pq.ParquetDataset('dataset_v2_fragments/', - ... use_legacy_dataset=False) - - List the fragments: - - >>> dataset.fragments - [>> import pyarrow as pa - >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - >>> import pyarrow.parquet as pq - >>> pq.write_to_dataset(table, root_path='dataset_v2_files', - ... partition_cols=['year'], - ... use_legacy_dataset=False) - >>> dataset = pq.ParquetDataset('dataset_v2_files/', - ... use_legacy_dataset=False) - - List the files: - - >>> dataset.files - ['dataset_v2_files/year=2019/...-0.parquet', ... - """ - return self._dataset.files - - @property - def filesystem(self): - """ - The filesystem type of the Dataset source. - """ - return self._dataset.filesystem - - @property - def partitioning(self): - """ - The partitioning of the Dataset source, if discovered. - """ - return self._dataset.partitioning - - -_read_table_docstring = """ -{0} - -Parameters ----------- -source : str, pyarrow.NativeFile, or file-like object - If a string passed, can be a single file name or directory name. For - file-like objects, only read a single file. Use pyarrow.BufferReader to - read a file contained in a bytes or buffer-like object. -columns : list - If not None, only these columns will be read from the file. A column - name may be a prefix of a nested field, e.g. 'a' will select 'a.b', - 'a.c', and 'a.d.e'. If empty, no columns will be read. Note - that the table will still have the correct num_rows set despite having - no columns. -use_threads : bool, default True - Perform multi-threaded column reads. -metadata : FileMetaData - If separately computed -schema : Schema, optional - Optionally provide the Schema for the parquet dataset, in which case it - will not be inferred from the source. -{1} -use_legacy_dataset : bool, default False - By default, `read_table` uses the new Arrow Datasets API since - pyarrow 1.0.0. Among other things, this allows to pass `filters` - for all columns and not only the partition keys, enables - different partitioning schemes, etc. - Set to True to use the legacy behaviour (this option is deprecated, - and the legacy implementation will be removed in a future version). -ignore_prefixes : list, optional - Files matching any of these prefixes will be ignored by the - discovery process if use_legacy_dataset=False. - This is matched to the basename of a path. - By default this is ['.', '_']. - Note that discovery happens only if a directory is passed as source. -filesystem : FileSystem, default None - If nothing passed, will be inferred based on path. - Path will try to be found in the local on-disk filesystem otherwise - it will be parsed as an URI to determine the filesystem. -filters : List[Tuple] or List[List[Tuple]] or None (default) - Rows which do not match the filter predicate will be removed from scanned - data. Partition keys embedded in a nested directory structure will be - exploited to avoid loading files at all if they contain no matching rows. - If `use_legacy_dataset` is True, filters can only reference partition - keys and only a hive-style directory structure is supported. When - setting `use_legacy_dataset` to False, also within-file level filtering - and different partitioning schemes are supported. - - {3} -pre_buffer : bool, default True - Coalesce and issue file reads in parallel to improve performance on - high-latency filesystems (e.g. S3). If True, Arrow will use a - background I/O thread pool. This option is only supported for - use_legacy_dataset=False. If using a filesystem layer that itself - performs readahead (e.g. fsspec's S3FS), disable readahead for best - results. -coerce_int96_timestamp_unit : str, default None. - Cast timestamps that are stored in INT96 format to a particular - resolution (e.g. 'ms'). Setting to None is equivalent to 'ns' - and therefore INT96 timestamps will be inferred as timestamps - in nanoseconds. -decryption_properties : FileDecryptionProperties or None - File-level decryption properties. - The decryption properties can be created using - ``CryptoFactory.file_decryption_properties()``. -thrift_string_size_limit : int, default None - If not None, override the maximum total string size allocated - when decoding Thrift structures. The default limit should be - sufficient for most Parquet files. -thrift_container_size_limit : int, default None - If not None, override the maximum total size of containers allocated - when decoding Thrift structures. The default limit should be - sufficient for most Parquet files. - -Returns -------- -{2} - -{4} -""" - -_read_table_example = """\ - -Examples --------- - -Generate an example PyArrow Table and write it to a partitioned dataset: - ->>> import pyarrow as pa ->>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], -... 'n_legs': [2, 2, 4, 4, 5, 100], -... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", -... "Brittle stars", "Centipede"]}) ->>> import pyarrow.parquet as pq ->>> pq.write_to_dataset(table, root_path='dataset_name_2', -... partition_cols=['year']) - -Read the data: - ->>> pq.read_table('dataset_name_2').to_pandas() - n_legs animal year -0 5 Brittle stars 2019 -1 2 Flamingo 2020 -2 4 Dog 2021 -3 100 Centipede 2021 -4 2 Parrot 2022 -5 4 Horse 2022 - - -Read only a subset of columns: - ->>> pq.read_table('dataset_name_2', columns=["n_legs", "animal"]) -pyarrow.Table -n_legs: int64 -animal: string ----- -n_legs: [[5],[2],[4,100],[2,4]] -animal: [["Brittle stars"],["Flamingo"],["Dog","Centipede"],["Parrot","Horse"]] - -Read a subset of columns and read one column as DictionaryArray: - ->>> pq.read_table('dataset_name_2', columns=["n_legs", "animal"], -... read_dictionary=["animal"]) -pyarrow.Table -n_legs: int64 -animal: dictionary ----- -n_legs: [[5],[2],[4,100],[2,4]] -animal: [ -- dictionary: -["Brittle stars"] -- indices: -[0], -- dictionary: -["Flamingo"] -- indices: -[0], -- dictionary: -["Dog","Centipede"] -- indices: -[0,1], -- dictionary: -["Parrot","Horse"] -- indices: -[0,1]] - -Read the table with filter: - ->>> pq.read_table('dataset_name_2', columns=["n_legs", "animal"], -... filters=[('n_legs','<',4)]).to_pandas() - n_legs animal -0 2 Flamingo -1 2 Parrot - -Read data from a single Parquet file: - ->>> pq.write_table(table, 'example.parquet') ->>> pq.read_table('dataset_name_2').to_pandas() - n_legs animal year -0 5 Brittle stars 2019 -1 2 Flamingo 2020 -2 4 Dog 2021 -3 100 Centipede 2021 -4 2 Parrot 2022 -5 4 Horse 2022 -""" - - -def read_table(source, *, columns=None, use_threads=True, metadata=None, - schema=None, use_pandas_metadata=False, memory_map=False, - read_dictionary=None, filesystem=None, filters=None, - buffer_size=0, partitioning="hive", use_legacy_dataset=False, - ignore_prefixes=None, pre_buffer=True, - coerce_int96_timestamp_unit=None, - decryption_properties=None, thrift_string_size_limit=None, - thrift_container_size_limit=None): - if not use_legacy_dataset: - if metadata is not None: - raise ValueError( - "The 'metadata' keyword is no longer supported with the new " - "datasets-based implementation. Specify " - "'use_legacy_dataset=True' to temporarily recover the old " - "behaviour." - ) - try: - dataset = _ParquetDatasetV2( - source, - schema=schema, - filesystem=filesystem, - partitioning=partitioning, - memory_map=memory_map, - read_dictionary=read_dictionary, - buffer_size=buffer_size, - filters=filters, - ignore_prefixes=ignore_prefixes, - pre_buffer=pre_buffer, - coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, - thrift_string_size_limit=thrift_string_size_limit, - thrift_container_size_limit=thrift_container_size_limit, - ) - except ImportError: - # fall back on ParquetFile for simple cases when pyarrow.dataset - # module is not available - if filters is not None: - raise ValueError( - "the 'filters' keyword is not supported when the " - "pyarrow.dataset module is not available" - ) - if partitioning != "hive": - raise ValueError( - "the 'partitioning' keyword is not supported when the " - "pyarrow.dataset module is not available" - ) - if schema is not None: - raise ValueError( - "the 'schema' argument is not supported when the " - "pyarrow.dataset module is not available" - ) - filesystem, path = _resolve_filesystem_and_path(source, filesystem) - if filesystem is not None: - source = filesystem.open_input_file(path) - # TODO test that source is not a directory or a list - dataset = ParquetFile( - source, metadata=metadata, read_dictionary=read_dictionary, - memory_map=memory_map, buffer_size=buffer_size, - pre_buffer=pre_buffer, - coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, - decryption_properties=decryption_properties, - thrift_string_size_limit=thrift_string_size_limit, - thrift_container_size_limit=thrift_container_size_limit, - ) - - return dataset.read(columns=columns, use_threads=use_threads, - use_pandas_metadata=use_pandas_metadata) - - warnings.warn( - "Passing 'use_legacy_dataset=True' to get the legacy behaviour is " - "deprecated as of pyarrow 8.0.0, and the legacy implementation will " - "be removed in a future version.", - FutureWarning, stacklevel=2) - - if ignore_prefixes is not None: - raise ValueError( - "The 'ignore_prefixes' keyword is only supported when " - "use_legacy_dataset=False") - - if schema is not None: - raise ValueError( - "The 'schema' argument is only supported when " - "use_legacy_dataset=False") - - if _is_path_like(source): - pf = ParquetDataset( - source, metadata=metadata, memory_map=memory_map, - read_dictionary=read_dictionary, - buffer_size=buffer_size, - filesystem=filesystem, filters=filters, - partitioning=partitioning, - coerce_int96_timestamp_unit=coerce_int96_timestamp_unit - ) - else: - pf = ParquetFile( - source, metadata=metadata, - read_dictionary=read_dictionary, - memory_map=memory_map, - buffer_size=buffer_size, - coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, - decryption_properties=decryption_properties - ) - return pf.read(columns=columns, use_threads=use_threads, - use_pandas_metadata=use_pandas_metadata) - - -read_table.__doc__ = _read_table_docstring.format( - """Read a Table from Parquet format - -Note: starting with pyarrow 1.0, the default for `use_legacy_dataset` is -switched to False.""", - "\n".join((_read_docstring_common, - """use_pandas_metadata : bool, default False - If True and file has custom pandas schema metadata, ensure that - index columns are also loaded.""")), - """pyarrow.Table - Content of the file as a table (of columns)""", - _DNF_filter_doc, _read_table_example) - - -def read_pandas(source, columns=None, **kwargs): - return read_table( - source, columns=columns, use_pandas_metadata=True, **kwargs - ) - - -read_pandas.__doc__ = _read_table_docstring.format( - 'Read a Table from Parquet format, also reading DataFrame\n' - 'index values if known in the file metadata', - "\n".join((_read_docstring_common, - """**kwargs - additional options for :func:`read_table`""")), - """pyarrow.Table - Content of the file as a Table of Columns, including DataFrame - indexes as columns""", - _DNF_filter_doc, "") - - -def write_table(table, where, row_group_size=None, version='2.4', - use_dictionary=True, compression='snappy', - write_statistics=True, - use_deprecated_int96_timestamps=None, - coerce_timestamps=None, - allow_truncated_timestamps=False, - data_page_size=None, flavor=None, - filesystem=None, - compression_level=None, - use_byte_stream_split=False, - column_encoding=None, - data_page_version='1.0', - use_compliant_nested_type=False, - encryption_properties=None, - write_batch_size=None, - dictionary_pagesize_limit=None, - **kwargs): - row_group_size = kwargs.pop('chunk_size', row_group_size) - use_int96 = use_deprecated_int96_timestamps - try: - with ParquetWriter( - where, table.schema, - filesystem=filesystem, - version=version, - flavor=flavor, - use_dictionary=use_dictionary, - write_statistics=write_statistics, - coerce_timestamps=coerce_timestamps, - data_page_size=data_page_size, - allow_truncated_timestamps=allow_truncated_timestamps, - compression=compression, - use_deprecated_int96_timestamps=use_int96, - compression_level=compression_level, - use_byte_stream_split=use_byte_stream_split, - column_encoding=column_encoding, - data_page_version=data_page_version, - use_compliant_nested_type=use_compliant_nested_type, - encryption_properties=encryption_properties, - write_batch_size=write_batch_size, - dictionary_pagesize_limit=dictionary_pagesize_limit, - **kwargs) as writer: - writer.write_table(table, row_group_size=row_group_size) - except Exception: - if _is_path_like(where): - try: - os.remove(_stringify_path(where)) - except os.error: - pass - raise - - -_write_table_example = """\ -Generate an example PyArrow Table: - ->>> import pyarrow as pa ->>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], -... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", -... "Brittle stars", "Centipede"]}) - -and write the Table into Parquet file: - ->>> import pyarrow.parquet as pq ->>> pq.write_table(table, 'example.parquet') - -Defining row group size for the Parquet file: - ->>> pq.write_table(table, 'example.parquet', row_group_size=3) - -Defining row group compression (default is Snappy): - ->>> pq.write_table(table, 'example.parquet', compression='none') - -Defining row group compression and encoding per-column: - ->>> pq.write_table(table, 'example.parquet', -... compression={'n_legs': 'snappy', 'animal': 'gzip'}, -... use_dictionary=['n_legs', 'animal']) - -Defining column encoding per-column: - ->>> pq.write_table(table, 'example.parquet', -... column_encoding={'animal':'PLAIN'}, -... use_dictionary=False) -""" - -write_table.__doc__ = """ -Write a Table to Parquet format. - -Parameters ----------- -table : pyarrow.Table -where : string or pyarrow.NativeFile -row_group_size : int - Maximum size of each written row group. If None, the - row group size will be the minimum of the Table size - and 64 * 1024 * 1024. -{} -**kwargs : optional - Additional options for ParquetWriter - -Examples --------- -{} -""".format(_parquet_writer_arg_docs, _write_table_example) - - -def _mkdir_if_not_exists(fs, path): - if fs._isfilestore() and not fs.exists(path): - try: - fs.mkdir(path) - except OSError: - assert fs.exists(path) - - -def write_to_dataset(table, root_path, partition_cols=None, - partition_filename_cb=None, filesystem=None, - use_legacy_dataset=None, schema=None, - partitioning=None, basename_template=None, - use_threads=None, file_visitor=None, - existing_data_behavior=None, - **kwargs): - """Wrapper around dataset.write_dataset (when use_legacy_dataset=False) or - parquet.write_table (when use_legacy_dataset=True) for writing a Table to - Parquet format by partitions. - For each combination of partition columns and values, - a subdirectories are created in the following - manner: - - root_dir/ - group1=value1 - group2=value1 - .parquet - group2=value2 - .parquet - group1=valueN - group2=value1 - .parquet - group2=valueN - .parquet - - Parameters - ---------- - table : pyarrow.Table - root_path : str, pathlib.Path - The root directory of the dataset - filesystem : FileSystem, default None - If nothing passed, will be inferred based on path. - Path will try to be found in the local on-disk filesystem otherwise - it will be parsed as an URI to determine the filesystem. - partition_cols : list, - Column names by which to partition the dataset. - Columns are partitioned in the order they are given - partition_filename_cb : callable, - A callback function that takes the partition key(s) as an argument - and allow you to override the partition filename. If nothing is - passed, the filename will consist of a uuid. - This option is only supported for use_legacy_dataset=True. - When use_legacy_dataset=None and this option is specified, - use_legacy_datase will be set to True. - use_legacy_dataset : bool - Default is False. Set to True to use the the legacy behaviour - (this option is deprecated, and the legacy implementation will be - removed in a future version). The legacy implementation still - supports the `partition_filename_cb` keyword but is less efficient - when using partition columns. - use_threads : bool, default True - Write files in parallel. If enabled, then maximum parallelism will be - used determined by the number of available CPU cores. - This option is only supported for use_legacy_dataset=False. - schema : Schema, optional - This option is only supported for use_legacy_dataset=False. - partitioning : Partitioning or list[str], optional - The partitioning scheme specified with the - ``pyarrow.dataset.partitioning()`` function or a list of field names. - When providing a list of field names, you can use - ``partitioning_flavor`` to drive which partitioning type should be - used. - This option is only supported for use_legacy_dataset=False. - basename_template : str, optional - A template string used to generate basenames of written data files. - The token '{i}' will be replaced with an automatically incremented - integer. If not specified, it defaults to "guid-{i}.parquet". - This option is only supported for use_legacy_dataset=False. - file_visitor : function - If set, this function will be called with a WrittenFile instance - for each file created during the call. This object will have both - a path attribute and a metadata attribute. - - The path attribute will be a string containing the path to - the created file. - - The metadata attribute will be the parquet metadata of the file. - This metadata will have the file path attribute set and can be used - to build a _metadata file. The metadata attribute will be None if - the format is not parquet. - - Example visitor which simple collects the filenames created:: - - visited_paths = [] - - def file_visitor(written_file): - visited_paths.append(written_file.path) - This option is only supported for use_legacy_dataset=False. - existing_data_behavior : 'overwrite_or_ignore' | 'error' | \ -'delete_matching' - Controls how the dataset will handle data that already exists in - the destination. The default behaviour is 'overwrite_or_ignore'. - - 'overwrite_or_ignore' will ignore any existing data and will - overwrite files with the same name as an output file. Other - existing files will be ignored. This behavior, in combination - with a unique basename_template for each write, will allow for - an append workflow. - - 'error' will raise an error if any data exists in the destination. - - 'delete_matching' is useful when you are writing a partitioned - dataset. The first time each partition directory is encountered - the entire directory will be deleted. This allows you to overwrite - old partitions completely. - This option is only supported for use_legacy_dataset=False. - **kwargs : dict, - When use_legacy_dataset=False, used as additional kwargs for - `dataset.write_dataset` function (passed to - `ParquetFileFormat.make_write_options`). See the docstring - of `write_table` for the available options. - When use_legacy_dataset=True, used as additional kwargs for - `parquet.write_table` function (See docstring for `write_table` - or `ParquetWriter` for more information). - Using `metadata_collector` in kwargs allows one to collect the - file metadata instances of dataset pieces. The file paths in the - ColumnChunkMetaData will be set relative to `root_path`. - - Examples - -------- - Generate an example PyArrow Table: - - >>> import pyarrow as pa - >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], - ... 'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - - and write it to a partitioned dataset: - - >>> import pyarrow.parquet as pq - >>> pq.write_to_dataset(table, root_path='dataset_name_3', - ... partition_cols=['year']) - >>> pq.ParquetDataset('dataset_name_3', use_legacy_dataset=False).files - ['dataset_name_3/year=2019/...-0.parquet', ... - - Write a single Parquet file into the root folder: - - >>> pq.write_to_dataset(table, root_path='dataset_name_4') - >>> pq.ParquetDataset('dataset_name_4/', use_legacy_dataset=False).files - ['dataset_name_4/...-0.parquet'] - """ - # Choose the implementation - if use_legacy_dataset is None: - # if partition_filename_cb is specified -> - # default to the old implementation - if partition_filename_cb: - use_legacy_dataset = True - # otherwise the default is False - else: - use_legacy_dataset = False - - # Check for conflicting kewords - msg_confl_0 = ( - "The '{0}' argument is not supported by use_legacy_dataset={2}. " - "Use only '{1}' instead." - ) - msg_confl_1 = ( - "The '{1}' argument is not supported by use_legacy_dataset={2}. " - "Use only '{0}' instead." - ) - msg_confl = msg_confl_0 if use_legacy_dataset else msg_confl_1 - if partition_filename_cb is not None and basename_template is not None: - raise ValueError(msg_confl.format("basename_template", - "partition_filename_cb", - use_legacy_dataset)) - - if partition_cols is not None and partitioning is not None: - raise ValueError(msg_confl.format("partitioning", - "partition_cols", - use_legacy_dataset)) - - metadata_collector = kwargs.pop('metadata_collector', None) - if metadata_collector is not None and file_visitor is not None: - raise ValueError(msg_confl.format("file_visitor", - "metadata_collector", - use_legacy_dataset)) - - # New dataset implementation - if not use_legacy_dataset: - import pyarrow.dataset as ds - - # extract non-file format options - schema = kwargs.pop("schema", None) - use_threads = kwargs.pop("use_threads", True) - chunk_size = kwargs.pop("chunk_size", None) - row_group_size = kwargs.pop("row_group_size", None) - - row_group_size = ( - row_group_size if row_group_size is not None else chunk_size - ) - - # raise for unsupported keywords - msg = ( - "The '{}' argument is not supported with the new dataset " - "implementation." - ) - - if metadata_collector is not None: - def file_visitor(written_file): - metadata_collector.append(written_file.metadata) - if partition_filename_cb is not None: - raise ValueError(msg.format("partition_filename_cb")) - - # map format arguments - parquet_format = ds.ParquetFileFormat() - write_options = parquet_format.make_write_options(**kwargs) - - # map old filesystems to new one - if filesystem is not None: - filesystem = _ensure_filesystem(filesystem) - - if partition_cols: - part_schema = table.select(partition_cols).schema - partitioning = ds.partitioning(part_schema, flavor="hive") - - if basename_template is None: - basename_template = guid() + '-{i}.parquet' - - if existing_data_behavior is None: - existing_data_behavior = 'overwrite_or_ignore' - - ds.write_dataset( - table, root_path, filesystem=filesystem, - format=parquet_format, file_options=write_options, schema=schema, - partitioning=partitioning, use_threads=use_threads, - file_visitor=file_visitor, - basename_template=basename_template, - existing_data_behavior=existing_data_behavior, - max_rows_per_group=row_group_size) - return - - # warnings and errors when using legacy implementation - if use_legacy_dataset: - warnings.warn( - "Passing 'use_legacy_dataset=True' to get the legacy behaviour is " - "deprecated as of pyarrow 8.0.0, and the legacy implementation " - "will be removed in a future version.", - FutureWarning, stacklevel=2) - msg2 = ( - "The '{}' argument is not supported with the legacy " - "implementation. To use this argument specify " - "'use_legacy_dataset=False' while constructing the " - "ParquetDataset." - ) - if schema is not None: - raise ValueError(msg2.format("schema")) - if partitioning is not None: - raise ValueError(msg2.format("partitioning")) - if use_threads is not None: - raise ValueError(msg2.format("use_threads")) - if file_visitor is not None: - raise ValueError(msg2.format("file_visitor")) - if existing_data_behavior is not None: - raise ValueError(msg2.format("existing_data_behavior")) - if basename_template is not None: - raise ValueError(msg2.format("basename_template")) - if partition_filename_cb is not None: - warnings.warn( - _DEPR_MSG.format("partition_filename_cb", " Specify " - "'use_legacy_dataset=False' while constructing " - "the ParquetDataset, and then use the " - "'basename_template' parameter instead. For " - "usage see `pyarrow.dataset.write_dataset`"), - FutureWarning, stacklevel=2) - - # Legacy implementation - fs, root_path = legacyfs.resolve_filesystem_and_path(root_path, filesystem) - - _mkdir_if_not_exists(fs, root_path) - - if partition_cols is not None and len(partition_cols) > 0: - df = table.to_pandas() - partition_keys = [df[col] for col in partition_cols] - data_df = df.drop(partition_cols, axis='columns') - data_cols = df.columns.drop(partition_cols) - if len(data_cols) == 0: - raise ValueError('No data left to save outside partition columns') - - subschema = table.schema - - # ARROW-2891: Ensure the output_schema is preserved when writing a - # partitioned dataset - for col in table.schema.names: - if col in partition_cols: - subschema = subschema.remove(subschema.get_field_index(col)) - - for keys, subgroup in data_df.groupby(partition_keys): - if not isinstance(keys, tuple): - keys = (keys,) - subdir = '/'.join( - ['{colname}={value}'.format(colname=name, value=val) - for name, val in zip(partition_cols, keys)]) - subtable = pa.Table.from_pandas(subgroup, schema=subschema, - safe=False) - _mkdir_if_not_exists(fs, '/'.join([root_path, subdir])) - if partition_filename_cb: - outfile = partition_filename_cb(keys) - else: - outfile = guid() + '.parquet' - relative_path = '/'.join([subdir, outfile]) - full_path = '/'.join([root_path, relative_path]) - with fs.open(full_path, 'wb') as f: - write_table(subtable, f, metadata_collector=metadata_collector, - **kwargs) - if metadata_collector is not None: - metadata_collector[-1].set_file_path(relative_path) - else: - if partition_filename_cb: - outfile = partition_filename_cb(None) - else: - outfile = guid() + '.parquet' - full_path = '/'.join([root_path, outfile]) - with fs.open(full_path, 'wb') as f: - write_table(table, f, metadata_collector=metadata_collector, - **kwargs) - if metadata_collector is not None: - metadata_collector[-1].set_file_path(outfile) - - -def write_metadata(schema, where, metadata_collector=None, **kwargs): - """ - Write metadata-only Parquet file from schema. This can be used with - `write_to_dataset` to generate `_common_metadata` and `_metadata` sidecar - files. - - Parameters - ---------- - schema : pyarrow.Schema - where : string or pyarrow.NativeFile - metadata_collector : list - where to collect metadata information. - **kwargs : dict, - Additional kwargs for ParquetWriter class. See docstring for - `ParquetWriter` for more information. - - Examples - -------- - Generate example data: - - >>> import pyarrow as pa - >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], - ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", - ... "Brittle stars", "Centipede"]}) - - Write a dataset and collect metadata information. - - >>> metadata_collector = [] - >>> import pyarrow.parquet as pq - >>> pq.write_to_dataset( - ... table, 'dataset_metadata', - ... metadata_collector=metadata_collector) - - Write the `_common_metadata` parquet file without row groups statistics. - - >>> pq.write_metadata( - ... table.schema, 'dataset_metadata/_common_metadata') - - Write the `_metadata` parquet file with row groups statistics. - - >>> pq.write_metadata( - ... table.schema, 'dataset_metadata/_metadata', - ... metadata_collector=metadata_collector) - """ - writer = ParquetWriter(where, schema, **kwargs) - writer.close() - - if metadata_collector is not None: - # ParquetWriter doesn't expose the metadata until it's written. Write - # it and read it again. - metadata = read_metadata(where) - for m in metadata_collector: - metadata.append_row_groups(m) - metadata.write_metadata_file(where) - - -def read_metadata(where, memory_map=False, decryption_properties=None): - """ - Read FileMetaData from footer of a single Parquet file. - - Parameters - ---------- - where : str (file path) or file-like object - memory_map : bool, default False - Create memory map when the source is a file path. - decryption_properties : FileDecryptionProperties, default None - Decryption properties for reading encrypted Parquet files. - - Returns - ------- - metadata : FileMetaData - - Examples - -------- - >>> import pyarrow as pa - >>> import pyarrow.parquet as pq - >>> table = pa.table({'n_legs': [4, 5, 100], - ... 'animal': ["Dog", "Brittle stars", "Centipede"]}) - >>> pq.write_table(table, 'example.parquet') - - >>> pq.read_metadata('example.parquet') - - created_by: parquet-cpp-arrow version ... - num_columns: 2 - num_rows: 3 - num_row_groups: 1 - format_version: 2.6 - serialized_size: 561 - """ - return ParquetFile(where, memory_map=memory_map, - decryption_properties=decryption_properties).metadata - - -def read_schema(where, memory_map=False, decryption_properties=None): - """ - Read effective Arrow schema from Parquet file metadata. - - Parameters - ---------- - where : str (file path) or file-like object - memory_map : bool, default False - Create memory map when the source is a file path. - decryption_properties : FileDecryptionProperties, default None - Decryption properties for reading encrypted Parquet files. - - Returns - ------- - schema : pyarrow.Schema - - Examples - -------- - >>> import pyarrow as pa - >>> import pyarrow.parquet as pq - >>> table = pa.table({'n_legs': [4, 5, 100], - ... 'animal': ["Dog", "Brittle stars", "Centipede"]}) - >>> pq.write_table(table, 'example.parquet') - - >>> pq.read_schema('example.parquet') - n_legs: int64 - animal: string - """ - return ParquetFile( - where, memory_map=memory_map, - decryption_properties=decryption_properties).schema.to_arrow_schema() +from .core import * diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py new file mode 100644 index 0000000000000..fd51829f50630 --- /dev/null +++ b/python/pyarrow/parquet/core.py @@ -0,0 +1,3508 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from collections import defaultdict +from concurrent import futures +from contextlib import nullcontext +from functools import partial, reduce + +import sys +import json +from collections.abc import Collection +import numpy as np +import os +import re +import operator +import urllib.parse +import warnings + +import pyarrow as pa +import pyarrow.lib as lib +import pyarrow._parquet as _parquet + +from pyarrow._parquet import (ParquetReader, Statistics, # noqa + FileMetaData, RowGroupMetaData, + ColumnChunkMetaData, + ParquetSchema, ColumnSchema, + ParquetLogicalType, + FileEncryptionProperties, + FileDecryptionProperties) +from pyarrow.fs import (LocalFileSystem, FileSystem, + _resolve_filesystem_and_path, _ensure_filesystem) +from pyarrow import filesystem as legacyfs +from pyarrow.util import guid, _is_path_like, _stringify_path + +_URI_STRIP_SCHEMES = ('hdfs',) + + +def _parse_uri(path): + path = _stringify_path(path) + parsed_uri = urllib.parse.urlparse(path) + if parsed_uri.scheme in _URI_STRIP_SCHEMES: + return parsed_uri.path + else: + # ARROW-4073: On Windows returning the path with the scheme + # stripped removes the drive letter, if any + return path + + +def _get_filesystem_and_path(passed_filesystem, path): + if passed_filesystem is None: + return legacyfs.resolve_filesystem_and_path(path, passed_filesystem) + else: + passed_filesystem = legacyfs._ensure_filesystem(passed_filesystem) + parsed_path = _parse_uri(path) + return passed_filesystem, parsed_path + + +def _check_contains_null(val): + if isinstance(val, bytes): + for byte in val: + if isinstance(byte, bytes): + compare_to = chr(0) + else: + compare_to = 0 + if byte == compare_to: + return True + elif isinstance(val, str): + return '\x00' in val + return False + + +def _check_filters(filters, check_null_strings=True): + """ + Check if filters are well-formed. + """ + if filters is not None: + if len(filters) == 0 or any(len(f) == 0 for f in filters): + raise ValueError("Malformed filters") + if isinstance(filters[0][0], str): + # We have encountered the situation where we have one nesting level + # too few: + # We have [(,,), ..] instead of [[(,,), ..]] + filters = [filters] + if check_null_strings: + for conjunction in filters: + for col, op, val in conjunction: + if ( + isinstance(val, list) and + all(_check_contains_null(v) for v in val) or + _check_contains_null(val) + ): + raise NotImplementedError( + "Null-terminated binary strings are not supported " + "as filter values." + ) + return filters + + +_DNF_filter_doc = """Predicates are expressed in disjunctive normal form (DNF), + like ``[[('x', '=', 0), ...], ...]``. DNF allows arbitrary boolean logical + combinations of single column predicates. The innermost tuples each + describe a single column predicate. The list of inner predicates is + interpreted as a conjunction (AND), forming a more selective and multiple + column predicate. Finally, the most outer list combines these filters as a + disjunction (OR). + + Predicates may also be passed as List[Tuple]. This form is interpreted + as a single conjunction. To express OR in predicates, one must + use the (preferred) List[List[Tuple]] notation. + + Each tuple has format: (``key``, ``op``, ``value``) and compares the + ``key`` with the ``value``. + The supported ``op`` are: ``=`` or ``==``, ``!=``, ``<``, ``>``, ``<=``, + ``>=``, ``in`` and ``not in``. If the ``op`` is ``in`` or ``not in``, the + ``value`` must be a collection such as a ``list``, a ``set`` or a + ``tuple``. + + Examples: + + .. code-block:: python + + ('x', '=', 0) + ('y', 'in', ['a', 'b', 'c']) + ('z', 'not in', {'a','b'}) + + """ + + +def _filters_to_expression(filters): + """ + Check if filters are well-formed. + + See _DNF_filter_doc above for more details. + """ + import pyarrow.dataset as ds + + if isinstance(filters, ds.Expression): + return filters + + filters = _check_filters(filters, check_null_strings=False) + + def convert_single_predicate(col, op, val): + field = ds.field(col) + + if op == "=" or op == "==": + return field == val + elif op == "!=": + return field != val + elif op == '<': + return field < val + elif op == '>': + return field > val + elif op == '<=': + return field <= val + elif op == '>=': + return field >= val + elif op == 'in': + return field.isin(val) + elif op == 'not in': + return ~field.isin(val) + else: + raise ValueError( + '"{0}" is not a valid operator in predicates.'.format( + (col, op, val))) + + disjunction_members = [] + + for conjunction in filters: + conjunction_members = [ + convert_single_predicate(col, op, val) + for col, op, val in conjunction + ] + + disjunction_members.append(reduce(operator.and_, conjunction_members)) + + return reduce(operator.or_, disjunction_members) + + +# ---------------------------------------------------------------------- +# Reading a single Parquet file + + +class ParquetFile: + """ + Reader interface for a single Parquet file. + + Parameters + ---------- + source : str, pathlib.Path, pyarrow.NativeFile, or file-like object + Readable source. For passing bytes or buffer-like file containing a + Parquet file, use pyarrow.BufferReader. + metadata : FileMetaData, default None + Use existing metadata object, rather than reading from file. + common_metadata : FileMetaData, default None + Will be used in reads for pandas schema metadata if not found in the + main file's metadata, no other uses at the moment. + memory_map : bool, default False + If the source is a file path, use a memory map to read file, which can + improve performance in some environments. + buffer_size : int, default 0 + If positive, perform read buffering when deserializing individual + column chunks. Otherwise IO calls are unbuffered. + pre_buffer : bool, default False + Coalesce and issue file reads in parallel to improve performance on + high-latency filesystems (e.g. S3). If True, Arrow will use a + background I/O thread pool. + read_dictionary : list + List of column names to read directly as DictionaryArray. + coerce_int96_timestamp_unit : str, default None. + Cast timestamps that are stored in INT96 format to a particular + resolution (e.g. 'ms'). Setting to None is equivalent to 'ns' + and therefore INT96 timestamps will be inferred as timestamps + in nanoseconds. + decryption_properties : FileDecryptionProperties, default None + File decryption properties for Parquet Modular Encryption. + thrift_string_size_limit : int, default None + If not None, override the maximum total string size allocated + when decoding Thrift structures. The default limit should be + sufficient for most Parquet files. + thrift_container_size_limit : int, default None + If not None, override the maximum total size of containers allocated + when decoding Thrift structures. The default limit should be + sufficient for most Parquet files. + + Examples + -------- + + Generate an example PyArrow Table and write it to Parquet file: + + >>> import pyarrow as pa + >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + + >>> import pyarrow.parquet as pq + >>> pq.write_table(table, 'example.parquet') + + Create a ``ParquetFile`` object from the Parquet file: + + >>> parquet_file = pq.ParquetFile('example.parquet') + + Read the data: + + >>> parquet_file.read() + pyarrow.Table + n_legs: int64 + animal: string + ---- + n_legs: [[2,2,4,4,5,100]] + animal: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]] + + Create a ParquetFile object with "animal" column as DictionaryArray: + + >>> parquet_file = pq.ParquetFile('example.parquet', + ... read_dictionary=["animal"]) + >>> parquet_file.read() + pyarrow.Table + n_legs: int64 + animal: dictionary + ---- + n_legs: [[2,2,4,4,5,100]] + animal: [ -- dictionary: + ["Flamingo","Parrot",...,"Brittle stars","Centipede"] -- indices: + [0,1,2,3,4,5]] + """ + + def __init__(self, source, *, metadata=None, common_metadata=None, + read_dictionary=None, memory_map=False, buffer_size=0, + pre_buffer=False, coerce_int96_timestamp_unit=None, + decryption_properties=None, thrift_string_size_limit=None, + thrift_container_size_limit=None): + self.reader = ParquetReader() + self.reader.open( + source, use_memory_map=memory_map, + buffer_size=buffer_size, pre_buffer=pre_buffer, + read_dictionary=read_dictionary, metadata=metadata, + coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, + decryption_properties=decryption_properties, + thrift_string_size_limit=thrift_string_size_limit, + thrift_container_size_limit=thrift_container_size_limit, + ) + self._close_source = getattr(source, 'closed', True) + self.common_metadata = common_metadata + self._nested_paths_by_prefix = self._build_nested_paths() + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + def _build_nested_paths(self): + paths = self.reader.column_paths + + result = defaultdict(list) + + for i, path in enumerate(paths): + key = path[0] + rest = path[1:] + while True: + result[key].append(i) + + if not rest: + break + + key = '.'.join((key, rest[0])) + rest = rest[1:] + + return result + + @property + def metadata(self): + """ + Return the Parquet metadata. + """ + return self.reader.metadata + + @property + def schema(self): + """ + Return the Parquet schema, unconverted to Arrow types + """ + return self.metadata.schema + + @property + def schema_arrow(self): + """ + Return the inferred Arrow schema, converted from the whole Parquet + file's schema + + Examples + -------- + Generate an example Parquet file: + + >>> import pyarrow as pa + >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_table(table, 'example.parquet') + >>> parquet_file = pq.ParquetFile('example.parquet') + + Read the Arrow schema: + + >>> parquet_file.schema_arrow + n_legs: int64 + animal: string + """ + return self.reader.schema_arrow + + @property + def num_row_groups(self): + """ + Return the number of row groups of the Parquet file. + + Examples + -------- + >>> import pyarrow as pa + >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_table(table, 'example.parquet') + >>> parquet_file = pq.ParquetFile('example.parquet') + + >>> parquet_file.num_row_groups + 1 + """ + return self.reader.num_row_groups + + def close(self, force: bool = False): + if self._close_source or force: + self.reader.close() + + @property + def closed(self) -> bool: + return self.reader.closed + + def read_row_group(self, i, columns=None, use_threads=True, + use_pandas_metadata=False): + """ + Read a single row group from a Parquet file. + + Parameters + ---------- + i : int + Index of the individual row group that we want to read. + columns : list + If not None, only these columns will be read from the row group. A + column name may be a prefix of a nested field, e.g. 'a' will select + 'a.b', 'a.c', and 'a.d.e'. + use_threads : bool, default True + Perform multi-threaded column reads. + use_pandas_metadata : bool, default False + If True and file has custom pandas schema metadata, ensure that + index columns are also loaded. + + Returns + ------- + pyarrow.table.Table + Content of the row group as a table (of columns) + + Examples + -------- + >>> import pyarrow as pa + >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_table(table, 'example.parquet') + >>> parquet_file = pq.ParquetFile('example.parquet') + + >>> parquet_file.read_row_group(0) + pyarrow.Table + n_legs: int64 + animal: string + ---- + n_legs: [[2,2,4,4,5,100]] + animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]] + """ + column_indices = self._get_column_indices( + columns, use_pandas_metadata=use_pandas_metadata) + return self.reader.read_row_group(i, column_indices=column_indices, + use_threads=use_threads) + + def read_row_groups(self, row_groups, columns=None, use_threads=True, + use_pandas_metadata=False): + """ + Read a multiple row groups from a Parquet file. + + Parameters + ---------- + row_groups : list + Only these row groups will be read from the file. + columns : list + If not None, only these columns will be read from the row group. A + column name may be a prefix of a nested field, e.g. 'a' will select + 'a.b', 'a.c', and 'a.d.e'. + use_threads : bool, default True + Perform multi-threaded column reads. + use_pandas_metadata : bool, default False + If True and file has custom pandas schema metadata, ensure that + index columns are also loaded. + + Returns + ------- + pyarrow.table.Table + Content of the row groups as a table (of columns). + + Examples + -------- + >>> import pyarrow as pa + >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_table(table, 'example.parquet') + >>> parquet_file = pq.ParquetFile('example.parquet') + + >>> parquet_file.read_row_groups([0,0]) + pyarrow.Table + n_legs: int64 + animal: string + ---- + n_legs: [[2,2,4,4,5,...,2,4,4,5,100]] + animal: [["Flamingo","Parrot","Dog",...,"Brittle stars","Centipede"]] + """ + column_indices = self._get_column_indices( + columns, use_pandas_metadata=use_pandas_metadata) + return self.reader.read_row_groups(row_groups, + column_indices=column_indices, + use_threads=use_threads) + + def iter_batches(self, batch_size=65536, row_groups=None, columns=None, + use_threads=True, use_pandas_metadata=False): + """ + Read streaming batches from a Parquet file. + + Parameters + ---------- + batch_size : int, default 64K + Maximum number of records to yield per batch. Batches may be + smaller if there aren't enough rows in the file. + row_groups : list + Only these row groups will be read from the file. + columns : list + If not None, only these columns will be read from the file. A + column name may be a prefix of a nested field, e.g. 'a' will select + 'a.b', 'a.c', and 'a.d.e'. + use_threads : boolean, default True + Perform multi-threaded column reads. + use_pandas_metadata : boolean, default False + If True and file has custom pandas schema metadata, ensure that + index columns are also loaded. + + Returns + ------- + iterator of pyarrow.RecordBatch + Contents of each batch as a record batch + + Examples + -------- + Generate an example Parquet file: + + >>> import pyarrow as pa + >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_table(table, 'example.parquet') + >>> parquet_file = pq.ParquetFile('example.parquet') + >>> for i in parquet_file.iter_batches(): + ... print("RecordBatch") + ... print(i.to_pandas()) + ... + RecordBatch + n_legs animal + 0 2 Flamingo + 1 2 Parrot + 2 4 Dog + 3 4 Horse + 4 5 Brittle stars + 5 100 Centipede + """ + if row_groups is None: + row_groups = range(0, self.metadata.num_row_groups) + column_indices = self._get_column_indices( + columns, use_pandas_metadata=use_pandas_metadata) + + batches = self.reader.iter_batches(batch_size, + row_groups=row_groups, + column_indices=column_indices, + use_threads=use_threads) + return batches + + def read(self, columns=None, use_threads=True, use_pandas_metadata=False): + """ + Read a Table from Parquet format. + + Parameters + ---------- + columns : list + If not None, only these columns will be read from the file. A + column name may be a prefix of a nested field, e.g. 'a' will select + 'a.b', 'a.c', and 'a.d.e'. + use_threads : bool, default True + Perform multi-threaded column reads. + use_pandas_metadata : bool, default False + If True and file has custom pandas schema metadata, ensure that + index columns are also loaded. + + Returns + ------- + pyarrow.table.Table + Content of the file as a table (of columns). + + Examples + -------- + Generate an example Parquet file: + + >>> import pyarrow as pa + >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_table(table, 'example.parquet') + >>> parquet_file = pq.ParquetFile('example.parquet') + + Read a Table: + + >>> parquet_file.read(columns=["animal"]) + pyarrow.Table + animal: string + ---- + animal: [["Flamingo","Parrot",...,"Brittle stars","Centipede"]] + """ + column_indices = self._get_column_indices( + columns, use_pandas_metadata=use_pandas_metadata) + return self.reader.read_all(column_indices=column_indices, + use_threads=use_threads) + + def scan_contents(self, columns=None, batch_size=65536): + """ + Read contents of file for the given columns and batch size. + + Notes + ----- + This function's primary purpose is benchmarking. + The scan is executed on a single thread. + + Parameters + ---------- + columns : list of integers, default None + Select columns to read, if None scan all columns. + batch_size : int, default 64K + Number of rows to read at a time internally. + + Returns + ------- + num_rows : number of rows in file + + Examples + -------- + >>> import pyarrow as pa + >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_table(table, 'example.parquet') + >>> parquet_file = pq.ParquetFile('example.parquet') + + >>> parquet_file.scan_contents() + 6 + """ + column_indices = self._get_column_indices(columns) + return self.reader.scan_contents(column_indices, + batch_size=batch_size) + + def _get_column_indices(self, column_names, use_pandas_metadata=False): + if column_names is None: + return None + + indices = [] + + for name in column_names: + if name in self._nested_paths_by_prefix: + indices.extend(self._nested_paths_by_prefix[name]) + + if use_pandas_metadata: + file_keyvalues = self.metadata.metadata + common_keyvalues = (self.common_metadata.metadata + if self.common_metadata is not None + else None) + + if file_keyvalues and b'pandas' in file_keyvalues: + index_columns = _get_pandas_index_columns(file_keyvalues) + elif common_keyvalues and b'pandas' in common_keyvalues: + index_columns = _get_pandas_index_columns(common_keyvalues) + else: + index_columns = [] + + if indices is not None and index_columns: + indices += [self.reader.column_name_idx(descr) + for descr in index_columns + if not isinstance(descr, dict)] + + return indices + + +_SPARK_DISALLOWED_CHARS = re.compile('[ ,;{}()\n\t=]') + + +def _sanitized_spark_field_name(name): + return _SPARK_DISALLOWED_CHARS.sub('_', name) + + +def _sanitize_schema(schema, flavor): + if 'spark' in flavor: + sanitized_fields = [] + + schema_changed = False + + for field in schema: + name = field.name + sanitized_name = _sanitized_spark_field_name(name) + + if sanitized_name != name: + schema_changed = True + sanitized_field = pa.field(sanitized_name, field.type, + field.nullable, field.metadata) + sanitized_fields.append(sanitized_field) + else: + sanitized_fields.append(field) + + new_schema = pa.schema(sanitized_fields, metadata=schema.metadata) + return new_schema, schema_changed + else: + return schema, False + + +def _sanitize_table(table, new_schema, flavor): + # TODO: This will not handle prohibited characters in nested field names + if 'spark' in flavor: + column_data = [table[i] for i in range(table.num_columns)] + return pa.Table.from_arrays(column_data, schema=new_schema) + else: + return table + + +_parquet_writer_arg_docs = """version : {"1.0", "2.4", "2.6"}, default "2.4" + Determine which Parquet logical types are available for use, whether the + reduced set from the Parquet 1.x.x format or the expanded logical types + added in later format versions. + Files written with version='2.4' or '2.6' may not be readable in all + Parquet implementations, so version='1.0' is likely the choice that + maximizes file compatibility. + UINT32 and some logical types are only available with version '2.4'. + Nanosecond timestamps are only available with version '2.6'. + Other features such as compression algorithms or the new serialized + data page format must be enabled separately (see 'compression' and + 'data_page_version'). +use_dictionary : bool or list + Specify if we should use dictionary encoding in general or only for + some columns. +use_deprecated_int96_timestamps : bool, default None + Write timestamps to INT96 Parquet format. Defaults to False unless enabled + by flavor argument. This take priority over the coerce_timestamps option. +coerce_timestamps : str, default None + Cast timestamps to a particular resolution. If omitted, defaults are chosen + depending on `version`. By default, for ``version='1.0'`` (the default) + and ``version='2.4'``, nanoseconds are cast to microseconds ('us'), while + for other `version` values, they are written natively without loss + of resolution. Seconds are always cast to milliseconds ('ms') by default, + as Parquet does not have any temporal type with seconds resolution. + If the casting results in loss of data, it will raise an exception + unless ``allow_truncated_timestamps=True`` is given. + Valid values: {None, 'ms', 'us'} +data_page_size : int, default None + Set a target threshold for the approximate encoded size of data + pages within a column chunk (in bytes). If None, use the default data page + size of 1MByte. +allow_truncated_timestamps : bool, default False + Allow loss of data when coercing timestamps to a particular + resolution. E.g. if microsecond or nanosecond data is lost when coercing to + 'ms', do not raise an exception. Passing ``allow_truncated_timestamp=True`` + will NOT result in the truncation exception being ignored unless + ``coerce_timestamps`` is not None. +compression : str or dict + Specify the compression codec, either on a general basis or per-column. + Valid values: {'NONE', 'SNAPPY', 'GZIP', 'BROTLI', 'LZ4', 'ZSTD'}. +write_statistics : bool or list + Specify if we should write statistics in general (default is True) or only + for some columns. +flavor : {'spark'}, default None + Sanitize schema or set other compatibility options to work with + various target systems. +filesystem : FileSystem, default None + If nothing passed, will be inferred from `where` if path-like, else + `where` is already a file-like object so no filesystem is needed. +compression_level : int or dict, default None + Specify the compression level for a codec, either on a general basis or + per-column. If None is passed, arrow selects the compression level for + the compression codec in use. The compression level has a different + meaning for each codec, so you have to read the documentation of the + codec you are using. + An exception is thrown if the compression codec does not allow specifying + a compression level. +use_byte_stream_split : bool or list, default False + Specify if the byte_stream_split encoding should be used in general or + only for some columns. If both dictionary and byte_stream_stream are + enabled, then dictionary is preferred. + The byte_stream_split encoding is valid only for floating-point data types + and should be combined with a compression codec. +column_encoding : string or dict, default None + Specify the encoding scheme on a per column basis. + Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT'}. + Certain encodings are only compatible with certain data types. + Please refer to the encodings section of `Reading and writing Parquet + files `_. +data_page_version : {"1.0", "2.0"}, default "1.0" + The serialized Parquet data page format version to write, defaults to + 1.0. This does not impact the file schema logical types and Arrow to + Parquet type casting behavior; for that use the "version" option. +use_compliant_nested_type : bool, default False + Whether to write compliant Parquet nested type (lists) as defined + `here `_, defaults to ``False``. + For ``use_compliant_nested_type=True``, this will write into a list + with 3-level structure where the middle level, named ``list``, + is a repeated group with a single field named ``element``:: + + group (LIST) { + repeated group list { + element; + } + } + + For ``use_compliant_nested_type=False``, this will also write into a list + with 3-level structure, where the name of the single field of the middle + level ``list`` is taken from the element name for nested columns in Arrow, + which defaults to ``item``:: + + group (LIST) { + repeated group list { + item; + } + } +encryption_properties : FileEncryptionProperties, default None + File encryption properties for Parquet Modular Encryption. + If None, no encryption will be done. + The encryption properties can be created using: + ``CryptoFactory.file_encryption_properties()``. +write_batch_size : int, default None + Number of values to write to a page at a time. If None, use the default of + 1024. ``write_batch_size`` is complementary to ``data_page_size``. If pages + are exceeding the ``data_page_size`` due to large column values, lowering + the batch size can help keep page sizes closer to the intended size. +dictionary_pagesize_limit : int, default None + Specify the dictionary page size limit per row group. If None, use the + default 1MB. +""" + +_parquet_writer_example_doc = """\ +Generate an example PyArrow Table and RecordBatch: + +>>> import pyarrow as pa +>>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], +... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", +... "Brittle stars", "Centipede"]}) +>>> batch = pa.record_batch([[2, 2, 4, 4, 5, 100], +... ["Flamingo", "Parrot", "Dog", "Horse", +... "Brittle stars", "Centipede"]], +... names=['n_legs', 'animal']) + +create a ParquetWriter object: + +>>> import pyarrow.parquet as pq +>>> writer = pq.ParquetWriter('example.parquet', table.schema) + +and write the Table into the Parquet file: + +>>> writer.write_table(table) +>>> writer.close() + +>>> pq.read_table('example.parquet').to_pandas() + n_legs animal +0 2 Flamingo +1 2 Parrot +2 4 Dog +3 4 Horse +4 5 Brittle stars +5 100 Centipede + +create a ParquetWriter object for the RecordBatch: + +>>> writer2 = pq.ParquetWriter('example2.parquet', batch.schema) + +and write the RecordBatch into the Parquet file: + +>>> writer2.write_batch(batch) +>>> writer2.close() + +>>> pq.read_table('example2.parquet').to_pandas() + n_legs animal +0 2 Flamingo +1 2 Parrot +2 4 Dog +3 4 Horse +4 5 Brittle stars +5 100 Centipede +""" + + +class ParquetWriter: + + __doc__ = """ +Class for incrementally building a Parquet file for Arrow tables. + +Parameters +---------- +where : path or file-like object +schema : pyarrow.Schema +{} +writer_engine_version : unused +**options : dict + If options contains a key `metadata_collector` then the + corresponding value is assumed to be a list (or any object with + `.append` method) that will be filled with the file metadata instance + of the written file. + +Examples +-------- +{} +""".format(_parquet_writer_arg_docs, _parquet_writer_example_doc) + + def __init__(self, where, schema, filesystem=None, + flavor=None, + version='2.4', + use_dictionary=True, + compression='snappy', + write_statistics=True, + use_deprecated_int96_timestamps=None, + compression_level=None, + use_byte_stream_split=False, + column_encoding=None, + writer_engine_version=None, + data_page_version='1.0', + use_compliant_nested_type=False, + encryption_properties=None, + write_batch_size=None, + dictionary_pagesize_limit=None, + **options): + if use_deprecated_int96_timestamps is None: + # Use int96 timestamps for Spark + if flavor is not None and 'spark' in flavor: + use_deprecated_int96_timestamps = True + else: + use_deprecated_int96_timestamps = False + + self.flavor = flavor + if flavor is not None: + schema, self.schema_changed = _sanitize_schema(schema, flavor) + else: + self.schema_changed = False + + self.schema = schema + self.where = where + + # If we open a file using a filesystem, store file handle so we can be + # sure to close it when `self.close` is called. + self.file_handle = None + + filesystem, path = _resolve_filesystem_and_path( + where, filesystem, allow_legacy_filesystem=True + ) + if filesystem is not None: + if isinstance(filesystem, legacyfs.FileSystem): + # legacy filesystem (eg custom subclass) + # TODO deprecate + sink = self.file_handle = filesystem.open(path, 'wb') + else: + # ARROW-10480: do not auto-detect compression. While + # a filename like foo.parquet.gz is nonconforming, it + # shouldn't implicitly apply compression. + sink = self.file_handle = filesystem.open_output_stream( + path, compression=None) + else: + sink = where + self._metadata_collector = options.pop('metadata_collector', None) + engine_version = 'V2' + self.writer = _parquet.ParquetWriter( + sink, schema, + version=version, + compression=compression, + use_dictionary=use_dictionary, + write_statistics=write_statistics, + use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, + compression_level=compression_level, + use_byte_stream_split=use_byte_stream_split, + column_encoding=column_encoding, + writer_engine_version=engine_version, + data_page_version=data_page_version, + use_compliant_nested_type=use_compliant_nested_type, + encryption_properties=encryption_properties, + write_batch_size=write_batch_size, + dictionary_pagesize_limit=dictionary_pagesize_limit, + **options) + self.is_open = True + + def __del__(self): + if getattr(self, 'is_open', False): + self.close() + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + # return false since we want to propagate exceptions + return False + + def write(self, table_or_batch, row_group_size=None): + """ + Write RecordBatch or Table to the Parquet file. + + Parameters + ---------- + table_or_batch : {RecordBatch, Table} + row_group_size : int, default None + Maximum size of each written row group. If None, the + row group size will be the minimum of the input + table or batch length and 64 * 1024 * 1024. + """ + if isinstance(table_or_batch, pa.RecordBatch): + self.write_batch(table_or_batch, row_group_size) + elif isinstance(table_or_batch, pa.Table): + self.write_table(table_or_batch, row_group_size) + else: + raise TypeError(type(table_or_batch)) + + def write_batch(self, batch, row_group_size=None): + """ + Write RecordBatch to the Parquet file. + + Parameters + ---------- + batch : RecordBatch + row_group_size : int, default None + Maximum size of each written row group. If None, the + row group size will be the minimum of the RecordBatch + size and 64 * 1024 * 1024. + """ + table = pa.Table.from_batches([batch], batch.schema) + self.write_table(table, row_group_size) + + def write_table(self, table, row_group_size=None): + """ + Write Table to the Parquet file. + + Parameters + ---------- + table : Table + row_group_size : int, default None + Maximum size of each written row group. If None, the + row group size will be the minimum of the Table size + and 64 * 1024 * 1024. + + """ + if self.schema_changed: + table = _sanitize_table(table, self.schema, self.flavor) + assert self.is_open + + if not table.schema.equals(self.schema, check_metadata=False): + msg = ('Table schema does not match schema used to create file: ' + '\ntable:\n{!s} vs. \nfile:\n{!s}' + .format(table.schema, self.schema)) + raise ValueError(msg) + + self.writer.write_table(table, row_group_size=row_group_size) + + def close(self): + """ + Close the connection to the Parquet file. + """ + if self.is_open: + self.writer.close() + self.is_open = False + if self._metadata_collector is not None: + self._metadata_collector.append(self.writer.metadata) + if self.file_handle is not None: + self.file_handle.close() + + +def _get_pandas_index_columns(keyvalues): + return (json.loads(keyvalues[b'pandas'].decode('utf8')) + ['index_columns']) + + +# ---------------------------------------------------------------------- +# Metadata container providing instructions about reading a single Parquet +# file, possibly part of a partitioned dataset + + +class ParquetDatasetPiece: + """ + DEPRECATED: A single chunk of a potentially larger Parquet dataset to read. + + The arguments will indicate to read either a single row group or all row + groups, and whether to add partition keys to the resulting pyarrow.Table. + + .. deprecated:: 5.0 + Directly constructing a ``ParquetDatasetPiece`` is deprecated, as well + as accessing the pieces of a ``ParquetDataset`` object. Specify + ``use_legacy_dataset=False`` when constructing the ``ParquetDataset`` + and use the ``ParquetDataset.fragments`` attribute instead. + + Parameters + ---------- + path : str or pathlib.Path + Path to file in the file system where this piece is located. + open_file_func : callable + Function to use for obtaining file handle to dataset piece. + partition_keys : list of tuples + Two-element tuples of ``(column name, ordinal index)``. + row_group : int, default None + Row group to load. By default, reads all row groups. + file_options : dict + Options + """ + + def __init__(self, path, open_file_func=partial(open, mode='rb'), + file_options=None, row_group=None, partition_keys=None): + warnings.warn( + "ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will " + "be removed in a future version.", + FutureWarning, stacklevel=2) + self._init( + path, open_file_func, file_options, row_group, partition_keys) + + @staticmethod + def _create(path, open_file_func=partial(open, mode='rb'), + file_options=None, row_group=None, partition_keys=None): + self = ParquetDatasetPiece.__new__(ParquetDatasetPiece) + self._init( + path, open_file_func, file_options, row_group, partition_keys) + return self + + def _init(self, path, open_file_func, file_options, row_group, + partition_keys): + self.path = _stringify_path(path) + self.open_file_func = open_file_func + self.row_group = row_group + self.partition_keys = partition_keys or [] + self.file_options = file_options or {} + + def __eq__(self, other): + if not isinstance(other, ParquetDatasetPiece): + return False + return (self.path == other.path and + self.row_group == other.row_group and + self.partition_keys == other.partition_keys) + + def __repr__(self): + return ('{}({!r}, row_group={!r}, partition_keys={!r})' + .format(type(self).__name__, self.path, + self.row_group, + self.partition_keys)) + + def __str__(self): + result = '' + + if len(self.partition_keys) > 0: + partition_str = ', '.join('{}={}'.format(name, index) + for name, index in self.partition_keys) + result += 'partition[{}] '.format(partition_str) + + result += self.path + + if self.row_group is not None: + result += ' | row_group={}'.format(self.row_group) + + return result + + def get_metadata(self): + """ + Return the file's metadata. + + Returns + ------- + metadata : FileMetaData + """ + with self.open() as parquet: + return parquet.metadata + + def open(self): + """ + Return instance of ParquetFile. + """ + reader = self.open_file_func(self.path) + if not isinstance(reader, ParquetFile): + reader = ParquetFile(reader, **self.file_options) + return reader + + def read(self, columns=None, use_threads=True, partitions=None, + file=None, use_pandas_metadata=False): + """ + Read this piece as a pyarrow.Table. + + Parameters + ---------- + columns : list of column names, default None + use_threads : bool, default True + Perform multi-threaded column reads. + partitions : ParquetPartitions, default None + file : file-like object + Passed to ParquetFile. + use_pandas_metadata : bool + If pandas metadata should be used or not. + + Returns + ------- + table : pyarrow.Table + """ + if self.open_file_func is not None: + reader = self.open() + elif file is not None: + reader = ParquetFile(file, **self.file_options) + else: + # try to read the local path + reader = ParquetFile(self.path, **self.file_options) + + options = dict(columns=columns, + use_threads=use_threads, + use_pandas_metadata=use_pandas_metadata) + + if self.row_group is not None: + table = reader.read_row_group(self.row_group, **options) + else: + table = reader.read(**options) + + if len(self.partition_keys) > 0: + if partitions is None: + raise ValueError('Must pass partition sets') + + # Here, the index is the categorical code of the partition where + # this piece is located. Suppose we had + # + # /foo=a/0.parq + # /foo=b/0.parq + # /foo=c/0.parq + # + # Then we assign a=0, b=1, c=2. And the resulting Table pieces will + # have a DictionaryArray column named foo having the constant index + # value as indicated. The distinct categories of the partition have + # been computed in the ParquetManifest + for i, (name, index) in enumerate(self.partition_keys): + # The partition code is the same for all values in this piece + indices = np.full(len(table), index, dtype='i4') + + # This is set of all partition values, computed as part of the + # manifest, so ['a', 'b', 'c'] as in our example above. + dictionary = partitions.levels[i].dictionary + + arr = pa.DictionaryArray.from_arrays(indices, dictionary) + table = table.append_column(name, arr) + + # To ParquetFile the source looked like it was already open, so won't + # actually close it without overriding. + reader.close(force=True) + return table + + +class PartitionSet: + """ + A data structure for cataloguing the observed Parquet partitions at a + particular level. So if we have + + /foo=a/bar=0 + /foo=a/bar=1 + /foo=a/bar=2 + /foo=b/bar=0 + /foo=b/bar=1 + /foo=b/bar=2 + + Then we have two partition sets, one for foo, another for bar. As we visit + levels of the partition hierarchy, a PartitionSet tracks the distinct + values and assigns categorical codes to use when reading the pieces + + Parameters + ---------- + name : str + Name of the partition set. Under which key to collect all values. + keys : list + All possible values that have been collected for that partition set. + """ + + def __init__(self, name, keys=None): + self.name = name + self.keys = keys or [] + self.key_indices = {k: i for i, k in enumerate(self.keys)} + self._dictionary = None + + def get_index(self, key): + """ + Get the index of the partition value if it is known, otherwise assign + one + + Parameters + ---------- + key : The value for which we want to known the index. + """ + if key in self.key_indices: + return self.key_indices[key] + else: + index = len(self.key_indices) + self.keys.append(key) + self.key_indices[key] = index + return index + + @property + def dictionary(self): + if self._dictionary is not None: + return self._dictionary + + if len(self.keys) == 0: + raise ValueError('No known partition keys') + + # Only integer and string partition types are supported right now + try: + integer_keys = [int(x) for x in self.keys] + dictionary = lib.array(integer_keys) + except ValueError: + dictionary = lib.array(self.keys) + + self._dictionary = dictionary + return dictionary + + @property + def is_sorted(self): + return list(self.keys) == sorted(self.keys) + + +class ParquetPartitions: + + def __init__(self): + self.levels = [] + self.partition_names = set() + + def __len__(self): + return len(self.levels) + + def __getitem__(self, i): + return self.levels[i] + + def equals(self, other): + if not isinstance(other, ParquetPartitions): + raise TypeError('`other` must be an instance of ParquetPartitions') + + return (self.levels == other.levels and + self.partition_names == other.partition_names) + + def __eq__(self, other): + try: + return self.equals(other) + except TypeError: + return NotImplemented + + def get_index(self, level, name, key): + """ + Record a partition value at a particular level, returning the distinct + code for that value at that level. + + Examples + -------- + + partitions.get_index(1, 'foo', 'a') returns 0 + partitions.get_index(1, 'foo', 'b') returns 1 + partitions.get_index(1, 'foo', 'c') returns 2 + partitions.get_index(1, 'foo', 'a') returns 0 + + Parameters + ---------- + level : int + The nesting level of the partition we are observing + name : str + The partition name + key : str or int + The partition value + """ + if level == len(self.levels): + if name in self.partition_names: + raise ValueError('{} was the name of the partition in ' + 'another level'.format(name)) + + part_set = PartitionSet(name) + self.levels.append(part_set) + self.partition_names.add(name) + + return self.levels[level].get_index(key) + + def filter_accepts_partition(self, part_key, filter, level): + p_column, p_value_index = part_key + f_column, op, f_value = filter + if p_column != f_column: + return True + + f_type = type(f_value) + + if op in {'in', 'not in'}: + if not isinstance(f_value, Collection): + raise TypeError( + "'%s' object is not a collection", f_type.__name__) + if not f_value: + raise ValueError("Cannot use empty collection as filter value") + if len({type(item) for item in f_value}) != 1: + raise ValueError("All elements of the collection '%s' must be" + " of same type", f_value) + f_type = type(next(iter(f_value))) + + elif not isinstance(f_value, str) and isinstance(f_value, Collection): + raise ValueError( + "Op '%s' not supported with a collection value", op) + + p_value = f_type(self.levels[level] + .dictionary[p_value_index].as_py()) + + if op == "=" or op == "==": + return p_value == f_value + elif op == "!=": + return p_value != f_value + elif op == '<': + return p_value < f_value + elif op == '>': + return p_value > f_value + elif op == '<=': + return p_value <= f_value + elif op == '>=': + return p_value >= f_value + elif op == 'in': + return p_value in f_value + elif op == 'not in': + return p_value not in f_value + else: + raise ValueError("'%s' is not a valid operator in predicates.", + filter[1]) + + +class ParquetManifest: + + def __init__(self, dirpath, open_file_func=None, filesystem=None, + pathsep='/', partition_scheme='hive', metadata_nthreads=1): + filesystem, dirpath = _get_filesystem_and_path(filesystem, dirpath) + self.filesystem = filesystem + self.open_file_func = open_file_func + self.pathsep = pathsep + self.dirpath = _stringify_path(dirpath) + self.partition_scheme = partition_scheme + self.partitions = ParquetPartitions() + self.pieces = [] + self._metadata_nthreads = metadata_nthreads + self._thread_pool = futures.ThreadPoolExecutor( + max_workers=metadata_nthreads) + + self.common_metadata_path = None + self.metadata_path = None + + self._visit_level(0, self.dirpath, []) + + # Due to concurrency, pieces will potentially by out of order if the + # dataset is partitioned so we sort them to yield stable results + self.pieces.sort(key=lambda piece: piece.path) + + if self.common_metadata_path is None: + # _common_metadata is a subset of _metadata + self.common_metadata_path = self.metadata_path + + self._thread_pool.shutdown() + + def _visit_level(self, level, base_path, part_keys): + fs = self.filesystem + + _, directories, files = next(fs.walk(base_path)) + + filtered_files = [] + for path in files: + full_path = self.pathsep.join((base_path, path)) + if path.endswith('_common_metadata'): + self.common_metadata_path = full_path + elif path.endswith('_metadata'): + self.metadata_path = full_path + elif self._should_silently_exclude(path): + continue + else: + filtered_files.append(full_path) + + # ARROW-1079: Filter out "private" directories starting with underscore + filtered_directories = [self.pathsep.join((base_path, x)) + for x in directories + if not _is_private_directory(x)] + + filtered_files.sort() + filtered_directories.sort() + + if len(filtered_files) > 0 and len(filtered_directories) > 0: + raise ValueError('Found files in an intermediate ' + 'directory: {}'.format(base_path)) + elif len(filtered_directories) > 0: + self._visit_directories(level, filtered_directories, part_keys) + else: + self._push_pieces(filtered_files, part_keys) + + def _should_silently_exclude(self, file_name): + return (file_name.endswith('.crc') or # Checksums + file_name.endswith('_$folder$') or # HDFS directories in S3 + file_name.startswith('.') or # Hidden files starting with . + file_name.startswith('_') or # Hidden files starting with _ + file_name in EXCLUDED_PARQUET_PATHS) + + def _visit_directories(self, level, directories, part_keys): + futures_list = [] + for path in directories: + head, tail = _path_split(path, self.pathsep) + name, key = _parse_hive_partition(tail) + + index = self.partitions.get_index(level, name, key) + dir_part_keys = part_keys + [(name, index)] + # If you have less threads than levels, the wait call will block + # indefinitely due to multiple waits within a thread. + if level < self._metadata_nthreads: + future = self._thread_pool.submit(self._visit_level, + level + 1, + path, + dir_part_keys) + futures_list.append(future) + else: + self._visit_level(level + 1, path, dir_part_keys) + if futures_list: + futures.wait(futures_list) + + def _parse_partition(self, dirname): + if self.partition_scheme == 'hive': + return _parse_hive_partition(dirname) + else: + raise NotImplementedError('partition schema: {}' + .format(self.partition_scheme)) + + def _push_pieces(self, files, part_keys): + self.pieces.extend([ + ParquetDatasetPiece._create(path, partition_keys=part_keys, + open_file_func=self.open_file_func) + for path in files + ]) + + +def _parse_hive_partition(value): + if '=' not in value: + raise ValueError('Directory name did not appear to be a ' + 'partition: {}'.format(value)) + return value.split('=', 1) + + +def _is_private_directory(x): + _, tail = os.path.split(x) + return (tail.startswith('_') or tail.startswith('.')) and '=' not in tail + + +def _path_split(path, sep): + i = path.rfind(sep) + 1 + head, tail = path[:i], path[i:] + head = head.rstrip(sep) + return head, tail + + +EXCLUDED_PARQUET_PATHS = {'_SUCCESS'} + + +class _ParquetDatasetMetadata: + __slots__ = ('fs', 'memory_map', 'read_dictionary', 'common_metadata', + 'buffer_size') + + +def _open_dataset_file(dataset, path, meta=None): + if (dataset.fs is not None and + not isinstance(dataset.fs, legacyfs.LocalFileSystem)): + path = dataset.fs.open(path, mode='rb') + return ParquetFile( + path, + metadata=meta, + memory_map=dataset.memory_map, + read_dictionary=dataset.read_dictionary, + common_metadata=dataset.common_metadata, + buffer_size=dataset.buffer_size + ) + + +_DEPR_MSG = ( + "'{}' attribute is deprecated as of pyarrow 5.0.0 and will be removed " + "in a future version.{}" +) + + +_read_docstring_common = """\ +read_dictionary : list, default None + List of names or column paths (for nested types) to read directly + as DictionaryArray. Only supported for BYTE_ARRAY storage. To read + a flat column as dictionary-encoded pass the column name. For + nested types, you must pass the full column "path", which could be + something like level1.level2.list.item. Refer to the Parquet + file's schema to obtain the paths. +memory_map : bool, default False + If the source is a file path, use a memory map to read file, which can + improve performance in some environments. +buffer_size : int, default 0 + If positive, perform read buffering when deserializing individual + column chunks. Otherwise IO calls are unbuffered. +partitioning : pyarrow.dataset.Partitioning or str or list of str, \ +default "hive" + The partitioning scheme for a partitioned dataset. The default of "hive" + assumes directory names with key=value pairs like "/year=2009/month=11". + In addition, a scheme like "/2009/11" is also supported, in which case + you need to specify the field names or a full schema. See the + ``pyarrow.dataset.partitioning()`` function for more details.""" + +_parquet_dataset_example = """\ +Generate an example PyArrow Table and write it to a partitioned dataset: + +>>> import pyarrow as pa +>>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], +... 'n_legs': [2, 2, 4, 4, 5, 100], +... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", +... "Brittle stars", "Centipede"]}) + +>>> import pyarrow.parquet as pq +>>> pq.write_to_dataset(table, root_path='dataset_name', +... partition_cols=['year'], +... use_legacy_dataset=False) + +create a ParquetDataset object from the dataset source: + +>>> dataset = pq.ParquetDataset('dataset_name/', use_legacy_dataset=False) + +and read the data: + +>>> dataset.read().to_pandas() + n_legs animal year +0 5 Brittle stars 2019 +1 2 Flamingo 2020 +2 4 Dog 2021 +3 100 Centipede 2021 +4 2 Parrot 2022 +5 4 Horse 2022 + +create a ParquetDataset object with filter: + +>>> dataset = pq.ParquetDataset('dataset_name/', use_legacy_dataset=False, +... filters=[('n_legs','=',4)]) +>>> dataset.read().to_pandas() + n_legs animal year +0 4 Dog 2021 +1 4 Horse 2022 +""" + + +class ParquetDataset: + + __doc__ = """ +Encapsulates details of reading a complete Parquet dataset possibly +consisting of multiple files and partitions in subdirectories. + +Parameters +---------- +path_or_paths : str or List[str] + A directory name, single file name, or list of file names. +filesystem : FileSystem, default None + If nothing passed, will be inferred based on path. + Path will try to be found in the local on-disk filesystem otherwise + it will be parsed as an URI to determine the filesystem. +metadata : pyarrow.parquet.FileMetaData + Use metadata obtained elsewhere to validate file schemas. +schema : pyarrow.parquet.Schema + Use schema obtained elsewhere to validate file schemas. Alternative to + metadata parameter. +split_row_groups : bool, default False + Divide files into pieces for each row group in the file. +validate_schema : bool, default True + Check that individual file schemas are all the same / compatible. +filters : List[Tuple] or List[List[Tuple]] or None (default) + Rows which do not match the filter predicate will be removed from scanned + data. Partition keys embedded in a nested directory structure will be + exploited to avoid loading files at all if they contain no matching rows. + If `use_legacy_dataset` is True, filters can only reference partition + keys and only a hive-style directory structure is supported. When + setting `use_legacy_dataset` to False, also within-file level filtering + and different partitioning schemes are supported. + + {1} +metadata_nthreads : int, default 1 + How many threads to allow the thread pool which is used to read the + dataset metadata. Increasing this is helpful to read partitioned + datasets. +{0} +use_legacy_dataset : bool, default True + Set to False to enable the new code path (using the + new Arrow Dataset API). Among other things, this allows to pass + `filters` for all columns and not only the partition keys, enables + different partitioning schemes, etc. +pre_buffer : bool, default True + Coalesce and issue file reads in parallel to improve performance on + high-latency filesystems (e.g. S3). If True, Arrow will use a + background I/O thread pool. This option is only supported for + use_legacy_dataset=False. If using a filesystem layer that itself + performs readahead (e.g. fsspec's S3FS), disable readahead for best + results. +coerce_int96_timestamp_unit : str, default None. + Cast timestamps that are stored in INT96 format to a particular resolution + (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96 + timestamps will be inferred as timestamps in nanoseconds. +thrift_string_size_limit : int, default None + If not None, override the maximum total string size allocated + when decoding Thrift structures. The default limit should be + sufficient for most Parquet files. +thrift_container_size_limit : int, default None + If not None, override the maximum total size of containers allocated + when decoding Thrift structures. The default limit should be + sufficient for most Parquet files. + +Examples +-------- +{2} +""".format(_read_docstring_common, _DNF_filter_doc, _parquet_dataset_example) + + def __new__(cls, path_or_paths=None, filesystem=None, schema=None, + metadata=None, split_row_groups=False, validate_schema=True, + filters=None, metadata_nthreads=None, read_dictionary=None, + memory_map=False, buffer_size=0, partitioning="hive", + use_legacy_dataset=None, pre_buffer=True, + coerce_int96_timestamp_unit=None, + thrift_string_size_limit=None, + thrift_container_size_limit=None): + if use_legacy_dataset is None: + # if a new filesystem is passed -> default to new implementation + if isinstance(filesystem, FileSystem): + use_legacy_dataset = False + # otherwise the default is still True + else: + use_legacy_dataset = True + + if not use_legacy_dataset: + return _ParquetDatasetV2( + path_or_paths, filesystem=filesystem, + filters=filters, + partitioning=partitioning, + read_dictionary=read_dictionary, + memory_map=memory_map, + buffer_size=buffer_size, + pre_buffer=pre_buffer, + coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, + # unsupported keywords + schema=schema, metadata=metadata, + split_row_groups=split_row_groups, + validate_schema=validate_schema, + metadata_nthreads=metadata_nthreads, + thrift_string_size_limit=thrift_string_size_limit, + thrift_container_size_limit=thrift_container_size_limit, + ) + self = object.__new__(cls) + return self + + def __init__(self, path_or_paths, filesystem=None, schema=None, + metadata=None, split_row_groups=False, validate_schema=True, + filters=None, metadata_nthreads=None, read_dictionary=None, + memory_map=False, buffer_size=0, partitioning="hive", + use_legacy_dataset=True, pre_buffer=True, + coerce_int96_timestamp_unit=None, + thrift_string_size_limit=None, + thrift_container_size_limit=None): + if partitioning != "hive": + raise ValueError( + 'Only "hive" for hive-like partitioning is supported when ' + 'using use_legacy_dataset=True') + if metadata_nthreads is not None: + warnings.warn( + "Specifying the 'metadata_nthreads' argument is deprecated as " + "of pyarrow 8.0.0, and the argument will be removed in a " + "future version", + FutureWarning, stacklevel=2, + ) + else: + metadata_nthreads = 1 + + self._ds_metadata = _ParquetDatasetMetadata() + a_path = path_or_paths + if isinstance(a_path, list): + a_path = a_path[0] + + self._ds_metadata.fs, _ = _get_filesystem_and_path(filesystem, a_path) + if isinstance(path_or_paths, list): + self.paths = [_parse_uri(path) for path in path_or_paths] + else: + self.paths = _parse_uri(path_or_paths) + + self._ds_metadata.read_dictionary = read_dictionary + self._ds_metadata.memory_map = memory_map + self._ds_metadata.buffer_size = buffer_size + + (self._pieces, + self._partitions, + self._common_metadata_path, + self._metadata_path) = _make_manifest( + path_or_paths, self._fs, metadata_nthreads=metadata_nthreads, + open_file_func=partial(_open_dataset_file, self._ds_metadata) + ) + + if self._common_metadata_path is not None: + with self._fs.open(self._common_metadata_path) as f: + self._ds_metadata.common_metadata = read_metadata( + f, + memory_map=memory_map + ) + else: + self._ds_metadata.common_metadata = None + + if metadata is not None: + warnings.warn( + "Specifying the 'metadata' argument with 'use_legacy_dataset=" + "True' is deprecated as of pyarrow 8.0.0.", + FutureWarning, stacklevel=2) + + if metadata is None and self._metadata_path is not None: + with self._fs.open(self._metadata_path) as f: + self._metadata = read_metadata(f, memory_map=memory_map) + else: + self._metadata = metadata + + if schema is not None: + warnings.warn( + "Specifying the 'schema' argument with 'use_legacy_dataset=" + "True' is deprecated as of pyarrow 8.0.0. You can still " + "specify it in combination with 'use_legacy_dataet=False', " + "but in that case you need to specify a pyarrow.Schema " + "instead of a ParquetSchema.", + FutureWarning, stacklevel=2) + self._schema = schema + + self.split_row_groups = split_row_groups + + if split_row_groups: + raise NotImplementedError("split_row_groups not yet implemented") + + if filters is not None: + filters = _check_filters(filters) + self._filter(filters) + + if validate_schema: + self.validate_schemas() + + def equals(self, other): + if not isinstance(other, ParquetDataset): + raise TypeError('`other` must be an instance of ParquetDataset') + + if self._fs.__class__ != other._fs.__class__: + return False + for prop in ('paths', '_pieces', '_partitions', + '_common_metadata_path', '_metadata_path', + '_common_metadata', '_metadata', '_schema', + 'split_row_groups'): + if getattr(self, prop) != getattr(other, prop): + return False + for prop in ('memory_map', 'buffer_size'): + if ( + getattr(self._ds_metadata, prop) != + getattr(other._ds_metadata, prop) + ): + return False + + return True + + def __eq__(self, other): + try: + return self.equals(other) + except TypeError: + return NotImplemented + + def validate_schemas(self): + if self._metadata is None and self._schema is None: + if self._common_metadata is not None: + self._schema = self._common_metadata.schema + else: + self._schema = self._pieces[0].get_metadata().schema + elif self._schema is None: + self._schema = self._metadata.schema + + # Verify schemas are all compatible + dataset_schema = self._schema.to_arrow_schema() + # Exclude the partition columns from the schema, they are provided + # by the path, not the DatasetPiece + if self._partitions is not None: + for partition_name in self._partitions.partition_names: + if dataset_schema.get_field_index(partition_name) != -1: + field_idx = dataset_schema.get_field_index(partition_name) + dataset_schema = dataset_schema.remove(field_idx) + + for piece in self._pieces: + file_metadata = piece.get_metadata() + file_schema = file_metadata.schema.to_arrow_schema() + if not dataset_schema.equals(file_schema, check_metadata=False): + raise ValueError('Schema in {!s} was different. \n' + '{!s}\n\nvs\n\n{!s}' + .format(piece, file_schema, + dataset_schema)) + + def read(self, columns=None, use_threads=True, use_pandas_metadata=False): + """ + Read multiple Parquet files as a single pyarrow.Table. + + Parameters + ---------- + columns : List[str] + Names of columns to read from the file. + use_threads : bool, default True + Perform multi-threaded column reads + use_pandas_metadata : bool, default False + Passed through to each dataset piece. + + Returns + ------- + pyarrow.Table + Content of the file as a table (of columns). + + Examples + -------- + Generate an example dataset: + + >>> import pyarrow as pa + >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_to_dataset(table, root_path='dataset_name_read', + ... partition_cols=['year'], + ... use_legacy_dataset=False) + >>> dataset = pq.ParquetDataset('dataset_name_read/', + ... use_legacy_dataset=False) + + Read multiple Parquet files as a single pyarrow.Table: + + >>> dataset.read(columns=["n_legs"]) + pyarrow.Table + n_legs: int64 + ---- + n_legs: [[5],[2],[4,100],[2,4]] + """ + tables = [] + for piece in self._pieces: + table = piece.read(columns=columns, + use_threads=use_threads, + partitions=self._partitions, + use_pandas_metadata=use_pandas_metadata) + tables.append(table) + + all_data = lib.concat_tables(tables) + + if use_pandas_metadata: + # We need to ensure that this metadata is set in the Table's schema + # so that Table.to_pandas will construct pandas.DataFrame with the + # right index + common_metadata = self._get_common_pandas_metadata() + current_metadata = all_data.schema.metadata or {} + + if common_metadata and b'pandas' not in current_metadata: + all_data = all_data.replace_schema_metadata({ + b'pandas': common_metadata}) + + return all_data + + def read_pandas(self, **kwargs): + """ + Read dataset including pandas metadata, if any. Other arguments passed + through to ParquetDataset.read, see docstring for further details. + + Parameters + ---------- + **kwargs : optional + All additional options to pass to the reader. + + Returns + ------- + pyarrow.Table + Content of the file as a table (of columns). + + Examples + -------- + Generate an example PyArrow Table and write it to a partitioned + dataset: + + >>> import pyarrow as pa + >>> import pandas as pd + >>> df = pd.DataFrame({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> table = pa.Table.from_pandas(df) + >>> import pyarrow.parquet as pq + >>> pq.write_table(table, 'table.parquet') + >>> dataset = pq.ParquetDataset('table.parquet', + ... use_legacy_dataset=False) + + Read dataset including pandas metadata: + + >>> dataset.read_pandas(columns=["n_legs"]) + pyarrow.Table + n_legs: int64 + ---- + n_legs: [[2,2,4,4,5,100]] + + Select pandas metadata: + + >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata + {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...} + """ + return self.read(use_pandas_metadata=True, **kwargs) + + def _get_common_pandas_metadata(self): + if self._common_metadata is None: + return None + + keyvalues = self._common_metadata.metadata + return keyvalues.get(b'pandas', None) + + def _filter(self, filters): + accepts_filter = self._partitions.filter_accepts_partition + + def one_filter_accepts(piece, filter): + return all(accepts_filter(part_key, filter, level) + for level, part_key in enumerate(piece.partition_keys)) + + def all_filters_accept(piece): + return any(all(one_filter_accepts(piece, f) for f in conjunction) + for conjunction in filters) + + self._pieces = [p for p in self._pieces if all_filters_accept(p)] + + @property + def pieces(self): + """ + DEPRECATED + """ + warnings.warn( + _DEPR_MSG.format( + "ParquetDataset.pieces", + " Specify 'use_legacy_dataset=False' while constructing the " + "ParquetDataset, and then use the '.fragments' attribute " + "instead."), + FutureWarning, stacklevel=2) + return self._pieces + + @property + def partitions(self): + """ + DEPRECATED + """ + warnings.warn( + _DEPR_MSG.format( + "ParquetDataset.partitions", + " Specify 'use_legacy_dataset=False' while constructing the " + "ParquetDataset, and then use the '.partitioning' attribute " + "instead."), + FutureWarning, stacklevel=2) + return self._partitions + + @property + def schema(self): + warnings.warn( + _DEPR_MSG.format( + "ParquetDataset.schema", + " Specify 'use_legacy_dataset=False' while constructing the " + "ParquetDataset, and then use the '.schema' attribute " + "instead (which will return an Arrow schema instead of a " + "Parquet schema)."), + FutureWarning, stacklevel=2) + return self._schema + + @property + def memory_map(self): + """ + DEPRECATED + """ + warnings.warn( + _DEPR_MSG.format("ParquetDataset.memory_map", ""), + FutureWarning, stacklevel=2) + return self._ds_metadata.memory_map + + @property + def read_dictionary(self): + """ + DEPRECATED + """ + warnings.warn( + _DEPR_MSG.format("ParquetDataset.read_dictionary", ""), + FutureWarning, stacklevel=2) + return self._ds_metadata.read_dictionary + + @property + def buffer_size(self): + """ + DEPRECATED + """ + warnings.warn( + _DEPR_MSG.format("ParquetDataset.buffer_size", ""), + FutureWarning, stacklevel=2) + return self._ds_metadata.buffer_size + + _fs = property( + operator.attrgetter('_ds_metadata.fs') + ) + + @property + def fs(self): + """ + DEPRECATED + """ + warnings.warn( + _DEPR_MSG.format( + "ParquetDataset.fs", + " Specify 'use_legacy_dataset=False' while constructing the " + "ParquetDataset, and then use the '.filesystem' attribute " + "instead."), + FutureWarning, stacklevel=2) + return self._ds_metadata.fs + + @property + def metadata(self): + """ + DEPRECATED + """ + warnings.warn( + _DEPR_MSG.format("ParquetDataset.metadata", ""), + FutureWarning, stacklevel=2) + return self._metadata + + @property + def metadata_path(self): + """ + DEPRECATED + """ + warnings.warn( + _DEPR_MSG.format("ParquetDataset.metadata_path", ""), + FutureWarning, stacklevel=2) + return self._metadata_path + + @property + def common_metadata_path(self): + """ + DEPRECATED + """ + warnings.warn( + _DEPR_MSG.format("ParquetDataset.common_metadata_path", ""), + FutureWarning, stacklevel=2) + return self._common_metadata_path + + _common_metadata = property( + operator.attrgetter('_ds_metadata.common_metadata') + ) + + @property + def common_metadata(self): + """ + DEPRECATED + """ + warnings.warn( + _DEPR_MSG.format("ParquetDataset.common_metadata", ""), + FutureWarning, stacklevel=2) + return self._ds_metadata.common_metadata + + @property + def fragments(self): + """ + A list of the Dataset source fragments or pieces with absolute + file paths. To use this property set 'use_legacy_dataset=False' + while constructing ParquetDataset object. + + Examples + -------- + Generate an example dataset: + + >>> import pyarrow as pa + >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_to_dataset(table, root_path='dataset_name_fragments', + ... partition_cols=['year'], + ... use_legacy_dataset=False) + >>> dataset = pq.ParquetDataset('dataset_name_fragments/', + ... use_legacy_dataset=False) + + List the fragments: + + >>> dataset.fragments + [>> import pyarrow as pa + >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_to_dataset(table, root_path='dataset_name_files', + ... partition_cols=['year'], + ... use_legacy_dataset=False) + >>> dataset = pq.ParquetDataset('dataset_name_files/', + ... use_legacy_dataset=False) + + List the files: + + >>> dataset.files + ['dataset_name_files/year=2019/...-0.parquet', ... + """ + raise NotImplementedError( + "To use this property set 'use_legacy_dataset=False' while " + "constructing the ParquetDataset") + + @property + def filesystem(self): + """ + The filesystem type of the Dataset source. + To use this property set 'use_legacy_dataset=False' + while constructing ParquetDataset object. + """ + raise NotImplementedError( + "To use this property set 'use_legacy_dataset=False' while " + "constructing the ParquetDataset") + + @property + def partitioning(self): + """ + The partitioning of the Dataset source, if discovered. + To use this property set 'use_legacy_dataset=False' + while constructing ParquetDataset object. + """ + raise NotImplementedError( + "To use this property set 'use_legacy_dataset=False' while " + "constructing the ParquetDataset") + + +def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1, + open_file_func=None): + partitions = None + common_metadata_path = None + metadata_path = None + + if isinstance(path_or_paths, list) and len(path_or_paths) == 1: + # Dask passes a directory as a list of length 1 + path_or_paths = path_or_paths[0] + + if _is_path_like(path_or_paths) and fs.isdir(path_or_paths): + manifest = ParquetManifest(path_or_paths, filesystem=fs, + open_file_func=open_file_func, + pathsep=getattr(fs, "pathsep", "/"), + metadata_nthreads=metadata_nthreads) + common_metadata_path = manifest.common_metadata_path + metadata_path = manifest.metadata_path + pieces = manifest.pieces + partitions = manifest.partitions + else: + if not isinstance(path_or_paths, list): + path_or_paths = [path_or_paths] + + # List of paths + if len(path_or_paths) == 0: + raise ValueError('Must pass at least one file path') + + pieces = [] + for path in path_or_paths: + if not fs.isfile(path): + raise OSError('Passed non-file path: {}' + .format(path)) + piece = ParquetDatasetPiece._create( + path, open_file_func=open_file_func) + pieces.append(piece) + + return pieces, partitions, common_metadata_path, metadata_path + + +def _is_local_file_system(fs): + return isinstance(fs, LocalFileSystem) or isinstance( + fs, legacyfs.LocalFileSystem + ) + + +class _ParquetDatasetV2: + """ + ParquetDataset shim using the Dataset API under the hood. + + Examples + -------- + Generate an example PyArrow Table and write it to a partitioned dataset: + + >>> import pyarrow as pa + >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_to_dataset(table, root_path='dataset_v2', + ... partition_cols=['year'], + ... use_legacy_dataset=False) + + create a ParquetDataset object from the dataset source: + + >>> dataset = pq.ParquetDataset('dataset_v2/', use_legacy_dataset=False) + + and read the data: + + >>> dataset.read().to_pandas() + n_legs animal year + 0 5 Brittle stars 2019 + 1 2 Flamingo 2020 + 2 4 Dog 2021 + 3 100 Centipede 2021 + 4 2 Parrot 2022 + 5 4 Horse 2022 + + create a ParquetDataset object with filter: + + >>> dataset = pq.ParquetDataset('dataset_v2/', + ... filters=[('n_legs','=',4)], + ... use_legacy_dataset=False) + >>> dataset.read().to_pandas() + n_legs animal year + 0 4 Dog 2021 + 1 4 Horse 2022 + """ + + def __init__(self, path_or_paths, filesystem=None, *, filters=None, + partitioning="hive", read_dictionary=None, buffer_size=None, + memory_map=False, ignore_prefixes=None, pre_buffer=True, + coerce_int96_timestamp_unit=None, schema=None, + decryption_properties=None, thrift_string_size_limit=None, + thrift_container_size_limit=None, + **kwargs): + import pyarrow.dataset as ds + + # Raise error for not supported keywords + for keyword, default in [ + ("metadata", None), ("split_row_groups", False), + ("validate_schema", True), ("metadata_nthreads", None)]: + if keyword in kwargs and kwargs[keyword] is not default: + raise ValueError( + "Keyword '{0}' is not yet supported with the new " + "Dataset API".format(keyword)) + + # map format arguments + read_options = { + "pre_buffer": pre_buffer, + "coerce_int96_timestamp_unit": coerce_int96_timestamp_unit, + "thrift_string_size_limit": thrift_string_size_limit, + "thrift_container_size_limit": thrift_container_size_limit, + } + if buffer_size: + read_options.update(use_buffered_stream=True, + buffer_size=buffer_size) + if read_dictionary is not None: + read_options.update(dictionary_columns=read_dictionary) + + if decryption_properties is not None: + read_options.update(decryption_properties=decryption_properties) + + # map filters to Expressions + self._filters = filters + self._filter_expression = filters and _filters_to_expression(filters) + + # map old filesystems to new one + if filesystem is not None: + filesystem = _ensure_filesystem( + filesystem, use_mmap=memory_map) + elif filesystem is None and memory_map: + # if memory_map is specified, assume local file system (string + # path can in principle be URI for any filesystem) + filesystem = LocalFileSystem(use_mmap=memory_map) + + # This needs to be checked after _ensure_filesystem, because that + # handles the case of an fsspec LocalFileSystem + if ( + hasattr(path_or_paths, "__fspath__") and + filesystem is not None and + not _is_local_file_system(filesystem) + ): + raise TypeError( + "Path-like objects with __fspath__ must only be used with " + f"local file systems, not {type(filesystem)}" + ) + + # check for single fragment dataset + single_file = None + if not isinstance(path_or_paths, list): + if _is_path_like(path_or_paths): + path_or_paths = _stringify_path(path_or_paths) + if filesystem is None: + # path might be a URI describing the FileSystem as well + try: + filesystem, path_or_paths = FileSystem.from_uri( + path_or_paths) + except ValueError: + filesystem = LocalFileSystem(use_mmap=memory_map) + if filesystem.get_file_info(path_or_paths).is_file: + single_file = path_or_paths + else: + single_file = path_or_paths + + parquet_format = ds.ParquetFileFormat(**read_options) + + if single_file is not None: + fragment = parquet_format.make_fragment(single_file, filesystem) + + self._dataset = ds.FileSystemDataset( + [fragment], schema=schema or fragment.physical_schema, + format=parquet_format, + filesystem=fragment.filesystem + ) + return + + # check partitioning to enable dictionary encoding + if partitioning == "hive": + partitioning = ds.HivePartitioning.discover( + infer_dictionary=True) + + self._dataset = ds.dataset(path_or_paths, filesystem=filesystem, + schema=schema, format=parquet_format, + partitioning=partitioning, + ignore_prefixes=ignore_prefixes) + + @property + def schema(self): + """ + Schema of the Dataset. + + Examples + -------- + Generate an example dataset: + + >>> import pyarrow as pa + >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_to_dataset(table, root_path='dataset_v2_schema', + ... partition_cols=['year'], + ... use_legacy_dataset=False) + >>> dataset = pq.ParquetDataset('dataset_v2_schema/', + ... use_legacy_dataset=False) + + Read the schema: + + >>> dataset.schema + n_legs: int64 + animal: string + year: dictionary + """ + return self._dataset.schema + + def read(self, columns=None, use_threads=True, use_pandas_metadata=False): + """ + Read (multiple) Parquet files as a single pyarrow.Table. + + Parameters + ---------- + columns : List[str] + Names of columns to read from the dataset. The partition fields + are not automatically included (in contrast to when setting + ``use_legacy_dataset=True``). + use_threads : bool, default True + Perform multi-threaded column reads. + use_pandas_metadata : bool, default False + If True and file has custom pandas schema metadata, ensure that + index columns are also loaded. + + Returns + ------- + pyarrow.Table + Content of the file as a table (of columns). + + Examples + -------- + Generate an example dataset: + + >>> import pyarrow as pa + >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_to_dataset(table, root_path='dataset_v2_read', + ... partition_cols=['year'], + ... use_legacy_dataset=False) + >>> dataset = pq.ParquetDataset('dataset_v2_read/', + ... use_legacy_dataset=False) + + Read the dataset: + + >>> dataset.read(columns=["n_legs"]) + pyarrow.Table + n_legs: int64 + ---- + n_legs: [[5],[2],[4,100],[2,4]] + """ + # if use_pandas_metadata, we need to include index columns in the + # column selection, to be able to restore those in the pandas DataFrame + metadata = self.schema.metadata + if columns is not None and use_pandas_metadata: + if metadata and b'pandas' in metadata: + # RangeIndex can be represented as dict instead of column name + index_columns = [ + col for col in _get_pandas_index_columns(metadata) + if not isinstance(col, dict) + ] + columns = ( + list(columns) + list(set(index_columns) - set(columns)) + ) + + table = self._dataset.to_table( + columns=columns, filter=self._filter_expression, + use_threads=use_threads + ) + + # if use_pandas_metadata, restore the pandas metadata (which gets + # lost if doing a specific `columns` selection in to_table) + if use_pandas_metadata: + if metadata and b"pandas" in metadata: + new_metadata = table.schema.metadata or {} + new_metadata.update({b"pandas": metadata[b"pandas"]}) + table = table.replace_schema_metadata(new_metadata) + + return table + + def read_pandas(self, **kwargs): + """ + Read dataset including pandas metadata, if any. Other arguments passed + through to ParquetDataset.read, see docstring for further details. + + Examples + -------- + Generate an example parquet file: + + >>> import pyarrow as pa + >>> import pandas as pd + >>> df = pd.DataFrame({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> table = pa.Table.from_pandas(df) + >>> import pyarrow.parquet as pq + >>> pq.write_table(table, 'table_V2.parquet') + >>> dataset = pq.ParquetDataset('table_V2.parquet', + ... use_legacy_dataset=False) + + Read the dataset with pandas metadata: + + >>> dataset.read_pandas(columns=["n_legs"]) + pyarrow.Table + n_legs: int64 + ---- + n_legs: [[2,2,4,4,5,100]] + + >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata + {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...} + """ + return self.read(use_pandas_metadata=True, **kwargs) + + @property + def pieces(self): + warnings.warn( + _DEPR_MSG.format("ParquetDataset.pieces", + " Use the '.fragments' attribute instead"), + FutureWarning, stacklevel=2) + return list(self._dataset.get_fragments()) + + @property + def fragments(self): + """ + A list of the Dataset source fragments or pieces with absolute + file paths. + + Examples + -------- + Generate an example dataset: + + >>> import pyarrow as pa + >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_to_dataset(table, root_path='dataset_v2_fragments', + ... partition_cols=['year'], + ... use_legacy_dataset=False) + >>> dataset = pq.ParquetDataset('dataset_v2_fragments/', + ... use_legacy_dataset=False) + + List the fragments: + + >>> dataset.fragments + [>> import pyarrow as pa + >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + >>> import pyarrow.parquet as pq + >>> pq.write_to_dataset(table, root_path='dataset_v2_files', + ... partition_cols=['year'], + ... use_legacy_dataset=False) + >>> dataset = pq.ParquetDataset('dataset_v2_files/', + ... use_legacy_dataset=False) + + List the files: + + >>> dataset.files + ['dataset_v2_files/year=2019/...-0.parquet', ... + """ + return self._dataset.files + + @property + def filesystem(self): + """ + The filesystem type of the Dataset source. + """ + return self._dataset.filesystem + + @property + def partitioning(self): + """ + The partitioning of the Dataset source, if discovered. + """ + return self._dataset.partitioning + + +_read_table_docstring = """ +{0} + +Parameters +---------- +source : str, pyarrow.NativeFile, or file-like object + If a string passed, can be a single file name or directory name. For + file-like objects, only read a single file. Use pyarrow.BufferReader to + read a file contained in a bytes or buffer-like object. +columns : list + If not None, only these columns will be read from the file. A column + name may be a prefix of a nested field, e.g. 'a' will select 'a.b', + 'a.c', and 'a.d.e'. If empty, no columns will be read. Note + that the table will still have the correct num_rows set despite having + no columns. +use_threads : bool, default True + Perform multi-threaded column reads. +metadata : FileMetaData + If separately computed +schema : Schema, optional + Optionally provide the Schema for the parquet dataset, in which case it + will not be inferred from the source. +{1} +use_legacy_dataset : bool, default False + By default, `read_table` uses the new Arrow Datasets API since + pyarrow 1.0.0. Among other things, this allows to pass `filters` + for all columns and not only the partition keys, enables + different partitioning schemes, etc. + Set to True to use the legacy behaviour (this option is deprecated, + and the legacy implementation will be removed in a future version). +ignore_prefixes : list, optional + Files matching any of these prefixes will be ignored by the + discovery process if use_legacy_dataset=False. + This is matched to the basename of a path. + By default this is ['.', '_']. + Note that discovery happens only if a directory is passed as source. +filesystem : FileSystem, default None + If nothing passed, will be inferred based on path. + Path will try to be found in the local on-disk filesystem otherwise + it will be parsed as an URI to determine the filesystem. +filters : List[Tuple] or List[List[Tuple]] or None (default) + Rows which do not match the filter predicate will be removed from scanned + data. Partition keys embedded in a nested directory structure will be + exploited to avoid loading files at all if they contain no matching rows. + If `use_legacy_dataset` is True, filters can only reference partition + keys and only a hive-style directory structure is supported. When + setting `use_legacy_dataset` to False, also within-file level filtering + and different partitioning schemes are supported. + + {3} +pre_buffer : bool, default True + Coalesce and issue file reads in parallel to improve performance on + high-latency filesystems (e.g. S3). If True, Arrow will use a + background I/O thread pool. This option is only supported for + use_legacy_dataset=False. If using a filesystem layer that itself + performs readahead (e.g. fsspec's S3FS), disable readahead for best + results. +coerce_int96_timestamp_unit : str, default None. + Cast timestamps that are stored in INT96 format to a particular + resolution (e.g. 'ms'). Setting to None is equivalent to 'ns' + and therefore INT96 timestamps will be inferred as timestamps + in nanoseconds. +decryption_properties : FileDecryptionProperties or None + File-level decryption properties. + The decryption properties can be created using + ``CryptoFactory.file_decryption_properties()``. +thrift_string_size_limit : int, default None + If not None, override the maximum total string size allocated + when decoding Thrift structures. The default limit should be + sufficient for most Parquet files. +thrift_container_size_limit : int, default None + If not None, override the maximum total size of containers allocated + when decoding Thrift structures. The default limit should be + sufficient for most Parquet files. + +Returns +------- +{2} + +{4} +""" + +_read_table_example = """\ + +Examples +-------- + +Generate an example PyArrow Table and write it to a partitioned dataset: + +>>> import pyarrow as pa +>>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], +... 'n_legs': [2, 2, 4, 4, 5, 100], +... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", +... "Brittle stars", "Centipede"]}) +>>> import pyarrow.parquet as pq +>>> pq.write_to_dataset(table, root_path='dataset_name_2', +... partition_cols=['year']) + +Read the data: + +>>> pq.read_table('dataset_name_2').to_pandas() + n_legs animal year +0 5 Brittle stars 2019 +1 2 Flamingo 2020 +2 4 Dog 2021 +3 100 Centipede 2021 +4 2 Parrot 2022 +5 4 Horse 2022 + + +Read only a subset of columns: + +>>> pq.read_table('dataset_name_2', columns=["n_legs", "animal"]) +pyarrow.Table +n_legs: int64 +animal: string +---- +n_legs: [[5],[2],[4,100],[2,4]] +animal: [["Brittle stars"],["Flamingo"],["Dog","Centipede"],["Parrot","Horse"]] + +Read a subset of columns and read one column as DictionaryArray: + +>>> pq.read_table('dataset_name_2', columns=["n_legs", "animal"], +... read_dictionary=["animal"]) +pyarrow.Table +n_legs: int64 +animal: dictionary +---- +n_legs: [[5],[2],[4,100],[2,4]] +animal: [ -- dictionary: +["Brittle stars"] -- indices: +[0], -- dictionary: +["Flamingo"] -- indices: +[0], -- dictionary: +["Dog","Centipede"] -- indices: +[0,1], -- dictionary: +["Parrot","Horse"] -- indices: +[0,1]] + +Read the table with filter: + +>>> pq.read_table('dataset_name_2', columns=["n_legs", "animal"], +... filters=[('n_legs','<',4)]).to_pandas() + n_legs animal +0 2 Flamingo +1 2 Parrot + +Read data from a single Parquet file: + +>>> pq.write_table(table, 'example.parquet') +>>> pq.read_table('dataset_name_2').to_pandas() + n_legs animal year +0 5 Brittle stars 2019 +1 2 Flamingo 2020 +2 4 Dog 2021 +3 100 Centipede 2021 +4 2 Parrot 2022 +5 4 Horse 2022 +""" + + +def read_table(source, *, columns=None, use_threads=True, metadata=None, + schema=None, use_pandas_metadata=False, memory_map=False, + read_dictionary=None, filesystem=None, filters=None, + buffer_size=0, partitioning="hive", use_legacy_dataset=False, + ignore_prefixes=None, pre_buffer=True, + coerce_int96_timestamp_unit=None, + decryption_properties=None, thrift_string_size_limit=None, + thrift_container_size_limit=None): + if not use_legacy_dataset: + if metadata is not None: + raise ValueError( + "The 'metadata' keyword is no longer supported with the new " + "datasets-based implementation. Specify " + "'use_legacy_dataset=True' to temporarily recover the old " + "behaviour." + ) + try: + dataset = _ParquetDatasetV2( + source, + schema=schema, + filesystem=filesystem, + partitioning=partitioning, + memory_map=memory_map, + read_dictionary=read_dictionary, + buffer_size=buffer_size, + filters=filters, + ignore_prefixes=ignore_prefixes, + pre_buffer=pre_buffer, + coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, + thrift_string_size_limit=thrift_string_size_limit, + thrift_container_size_limit=thrift_container_size_limit, + ) + except ImportError: + # fall back on ParquetFile for simple cases when pyarrow.dataset + # module is not available + if filters is not None: + raise ValueError( + "the 'filters' keyword is not supported when the " + "pyarrow.dataset module is not available" + ) + if partitioning != "hive": + raise ValueError( + "the 'partitioning' keyword is not supported when the " + "pyarrow.dataset module is not available" + ) + if schema is not None: + raise ValueError( + "the 'schema' argument is not supported when the " + "pyarrow.dataset module is not available" + ) + filesystem, path = _resolve_filesystem_and_path(source, filesystem) + if filesystem is not None: + source = filesystem.open_input_file(path) + # TODO test that source is not a directory or a list + dataset = ParquetFile( + source, metadata=metadata, read_dictionary=read_dictionary, + memory_map=memory_map, buffer_size=buffer_size, + pre_buffer=pre_buffer, + coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, + decryption_properties=decryption_properties, + thrift_string_size_limit=thrift_string_size_limit, + thrift_container_size_limit=thrift_container_size_limit, + ) + + return dataset.read(columns=columns, use_threads=use_threads, + use_pandas_metadata=use_pandas_metadata) + + warnings.warn( + "Passing 'use_legacy_dataset=True' to get the legacy behaviour is " + "deprecated as of pyarrow 8.0.0, and the legacy implementation will " + "be removed in a future version.", + FutureWarning, stacklevel=2) + + if ignore_prefixes is not None: + raise ValueError( + "The 'ignore_prefixes' keyword is only supported when " + "use_legacy_dataset=False") + + if schema is not None: + raise ValueError( + "The 'schema' argument is only supported when " + "use_legacy_dataset=False") + + if _is_path_like(source): + pf = ParquetDataset( + source, metadata=metadata, memory_map=memory_map, + read_dictionary=read_dictionary, + buffer_size=buffer_size, + filesystem=filesystem, filters=filters, + partitioning=partitioning, + coerce_int96_timestamp_unit=coerce_int96_timestamp_unit + ) + else: + pf = ParquetFile( + source, metadata=metadata, + read_dictionary=read_dictionary, + memory_map=memory_map, + buffer_size=buffer_size, + coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, + decryption_properties=decryption_properties + ) + return pf.read(columns=columns, use_threads=use_threads, + use_pandas_metadata=use_pandas_metadata) + + +read_table.__doc__ = _read_table_docstring.format( + """Read a Table from Parquet format + +Note: starting with pyarrow 1.0, the default for `use_legacy_dataset` is +switched to False.""", + "\n".join((_read_docstring_common, + """use_pandas_metadata : bool, default False + If True and file has custom pandas schema metadata, ensure that + index columns are also loaded.""")), + """pyarrow.Table + Content of the file as a table (of columns)""", + _DNF_filter_doc, _read_table_example) + + +def read_pandas(source, columns=None, **kwargs): + return read_table( + source, columns=columns, use_pandas_metadata=True, **kwargs + ) + + +read_pandas.__doc__ = _read_table_docstring.format( + 'Read a Table from Parquet format, also reading DataFrame\n' + 'index values if known in the file metadata', + "\n".join((_read_docstring_common, + """**kwargs + additional options for :func:`read_table`""")), + """pyarrow.Table + Content of the file as a Table of Columns, including DataFrame + indexes as columns""", + _DNF_filter_doc, "") + + +def write_table(table, where, row_group_size=None, version='2.4', + use_dictionary=True, compression='snappy', + write_statistics=True, + use_deprecated_int96_timestamps=None, + coerce_timestamps=None, + allow_truncated_timestamps=False, + data_page_size=None, flavor=None, + filesystem=None, + compression_level=None, + use_byte_stream_split=False, + column_encoding=None, + data_page_version='1.0', + use_compliant_nested_type=False, + encryption_properties=None, + write_batch_size=None, + dictionary_pagesize_limit=None, + **kwargs): + row_group_size = kwargs.pop('chunk_size', row_group_size) + use_int96 = use_deprecated_int96_timestamps + try: + with ParquetWriter( + where, table.schema, + filesystem=filesystem, + version=version, + flavor=flavor, + use_dictionary=use_dictionary, + write_statistics=write_statistics, + coerce_timestamps=coerce_timestamps, + data_page_size=data_page_size, + allow_truncated_timestamps=allow_truncated_timestamps, + compression=compression, + use_deprecated_int96_timestamps=use_int96, + compression_level=compression_level, + use_byte_stream_split=use_byte_stream_split, + column_encoding=column_encoding, + data_page_version=data_page_version, + use_compliant_nested_type=use_compliant_nested_type, + encryption_properties=encryption_properties, + write_batch_size=write_batch_size, + dictionary_pagesize_limit=dictionary_pagesize_limit, + **kwargs) as writer: + writer.write_table(table, row_group_size=row_group_size) + except Exception: + if _is_path_like(where): + try: + os.remove(_stringify_path(where)) + except os.error: + pass + raise + + +_write_table_example = """\ +Generate an example PyArrow Table: + +>>> import pyarrow as pa +>>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], +... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", +... "Brittle stars", "Centipede"]}) + +and write the Table into Parquet file: + +>>> import pyarrow.parquet as pq +>>> pq.write_table(table, 'example.parquet') + +Defining row group size for the Parquet file: + +>>> pq.write_table(table, 'example.parquet', row_group_size=3) + +Defining row group compression (default is Snappy): + +>>> pq.write_table(table, 'example.parquet', compression='none') + +Defining row group compression and encoding per-column: + +>>> pq.write_table(table, 'example.parquet', +... compression={'n_legs': 'snappy', 'animal': 'gzip'}, +... use_dictionary=['n_legs', 'animal']) + +Defining column encoding per-column: + +>>> pq.write_table(table, 'example.parquet', +... column_encoding={'animal':'PLAIN'}, +... use_dictionary=False) +""" + +write_table.__doc__ = """ +Write a Table to Parquet format. + +Parameters +---------- +table : pyarrow.Table +where : string or pyarrow.NativeFile +row_group_size : int + Maximum size of each written row group. If None, the + row group size will be the minimum of the Table size + and 64 * 1024 * 1024. +{} +**kwargs : optional + Additional options for ParquetWriter + +Examples +-------- +{} +""".format(_parquet_writer_arg_docs, _write_table_example) + + +def _mkdir_if_not_exists(fs, path): + if fs._isfilestore() and not fs.exists(path): + try: + fs.mkdir(path) + except OSError: + assert fs.exists(path) + + +def write_to_dataset(table, root_path, partition_cols=None, + partition_filename_cb=None, filesystem=None, + use_legacy_dataset=None, schema=None, + partitioning=None, basename_template=None, + use_threads=None, file_visitor=None, + existing_data_behavior=None, + **kwargs): + """Wrapper around dataset.write_dataset (when use_legacy_dataset=False) or + parquet.write_table (when use_legacy_dataset=True) for writing a Table to + Parquet format by partitions. + For each combination of partition columns and values, + a subdirectories are created in the following + manner: + + root_dir/ + group1=value1 + group2=value1 + .parquet + group2=value2 + .parquet + group1=valueN + group2=value1 + .parquet + group2=valueN + .parquet + + Parameters + ---------- + table : pyarrow.Table + root_path : str, pathlib.Path + The root directory of the dataset + filesystem : FileSystem, default None + If nothing passed, will be inferred based on path. + Path will try to be found in the local on-disk filesystem otherwise + it will be parsed as an URI to determine the filesystem. + partition_cols : list, + Column names by which to partition the dataset. + Columns are partitioned in the order they are given + partition_filename_cb : callable, + A callback function that takes the partition key(s) as an argument + and allow you to override the partition filename. If nothing is + passed, the filename will consist of a uuid. + This option is only supported for use_legacy_dataset=True. + When use_legacy_dataset=None and this option is specified, + use_legacy_datase will be set to True. + use_legacy_dataset : bool + Default is False. Set to True to use the the legacy behaviour + (this option is deprecated, and the legacy implementation will be + removed in a future version). The legacy implementation still + supports the `partition_filename_cb` keyword but is less efficient + when using partition columns. + use_threads : bool, default True + Write files in parallel. If enabled, then maximum parallelism will be + used determined by the number of available CPU cores. + This option is only supported for use_legacy_dataset=False. + schema : Schema, optional + This option is only supported for use_legacy_dataset=False. + partitioning : Partitioning or list[str], optional + The partitioning scheme specified with the + ``pyarrow.dataset.partitioning()`` function or a list of field names. + When providing a list of field names, you can use + ``partitioning_flavor`` to drive which partitioning type should be + used. + This option is only supported for use_legacy_dataset=False. + basename_template : str, optional + A template string used to generate basenames of written data files. + The token '{i}' will be replaced with an automatically incremented + integer. If not specified, it defaults to "guid-{i}.parquet". + This option is only supported for use_legacy_dataset=False. + file_visitor : function + If set, this function will be called with a WrittenFile instance + for each file created during the call. This object will have both + a path attribute and a metadata attribute. + + The path attribute will be a string containing the path to + the created file. + + The metadata attribute will be the parquet metadata of the file. + This metadata will have the file path attribute set and can be used + to build a _metadata file. The metadata attribute will be None if + the format is not parquet. + + Example visitor which simple collects the filenames created:: + + visited_paths = [] + + def file_visitor(written_file): + visited_paths.append(written_file.path) + This option is only supported for use_legacy_dataset=False. + existing_data_behavior : 'overwrite_or_ignore' | 'error' | \ +'delete_matching' + Controls how the dataset will handle data that already exists in + the destination. The default behaviour is 'overwrite_or_ignore'. + + 'overwrite_or_ignore' will ignore any existing data and will + overwrite files with the same name as an output file. Other + existing files will be ignored. This behavior, in combination + with a unique basename_template for each write, will allow for + an append workflow. + + 'error' will raise an error if any data exists in the destination. + + 'delete_matching' is useful when you are writing a partitioned + dataset. The first time each partition directory is encountered + the entire directory will be deleted. This allows you to overwrite + old partitions completely. + This option is only supported for use_legacy_dataset=False. + **kwargs : dict, + When use_legacy_dataset=False, used as additional kwargs for + `dataset.write_dataset` function (passed to + `ParquetFileFormat.make_write_options`). See the docstring + of `write_table` for the available options. + When use_legacy_dataset=True, used as additional kwargs for + `parquet.write_table` function (See docstring for `write_table` + or `ParquetWriter` for more information). + Using `metadata_collector` in kwargs allows one to collect the + file metadata instances of dataset pieces. The file paths in the + ColumnChunkMetaData will be set relative to `root_path`. + + Examples + -------- + Generate an example PyArrow Table: + + >>> import pyarrow as pa + >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021], + ... 'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + + and write it to a partitioned dataset: + + >>> import pyarrow.parquet as pq + >>> pq.write_to_dataset(table, root_path='dataset_name_3', + ... partition_cols=['year']) + >>> pq.ParquetDataset('dataset_name_3', use_legacy_dataset=False).files + ['dataset_name_3/year=2019/...-0.parquet', ... + + Write a single Parquet file into the root folder: + + >>> pq.write_to_dataset(table, root_path='dataset_name_4') + >>> pq.ParquetDataset('dataset_name_4/', use_legacy_dataset=False).files + ['dataset_name_4/...-0.parquet'] + """ + # Choose the implementation + if use_legacy_dataset is None: + # if partition_filename_cb is specified -> + # default to the old implementation + if partition_filename_cb: + use_legacy_dataset = True + # otherwise the default is False + else: + use_legacy_dataset = False + + # Check for conflicting kewords + msg_confl_0 = ( + "The '{0}' argument is not supported by use_legacy_dataset={2}. " + "Use only '{1}' instead." + ) + msg_confl_1 = ( + "The '{1}' argument is not supported by use_legacy_dataset={2}. " + "Use only '{0}' instead." + ) + msg_confl = msg_confl_0 if use_legacy_dataset else msg_confl_1 + if partition_filename_cb is not None and basename_template is not None: + raise ValueError(msg_confl.format("basename_template", + "partition_filename_cb", + use_legacy_dataset)) + + if partition_cols is not None and partitioning is not None: + raise ValueError(msg_confl.format("partitioning", + "partition_cols", + use_legacy_dataset)) + + metadata_collector = kwargs.pop('metadata_collector', None) + if metadata_collector is not None and file_visitor is not None: + raise ValueError(msg_confl.format("file_visitor", + "metadata_collector", + use_legacy_dataset)) + + # New dataset implementation + if not use_legacy_dataset: + import pyarrow.dataset as ds + + # extract non-file format options + schema = kwargs.pop("schema", None) + use_threads = kwargs.pop("use_threads", True) + chunk_size = kwargs.pop("chunk_size", None) + row_group_size = kwargs.pop("row_group_size", None) + + row_group_size = ( + row_group_size if row_group_size is not None else chunk_size + ) + + # raise for unsupported keywords + msg = ( + "The '{}' argument is not supported with the new dataset " + "implementation." + ) + + if metadata_collector is not None: + def file_visitor(written_file): + metadata_collector.append(written_file.metadata) + if partition_filename_cb is not None: + raise ValueError(msg.format("partition_filename_cb")) + + # map format arguments + parquet_format = ds.ParquetFileFormat() + write_options = parquet_format.make_write_options(**kwargs) + + # map old filesystems to new one + if filesystem is not None: + filesystem = _ensure_filesystem(filesystem) + + if partition_cols: + part_schema = table.select(partition_cols).schema + partitioning = ds.partitioning(part_schema, flavor="hive") + + if basename_template is None: + basename_template = guid() + '-{i}.parquet' + + if existing_data_behavior is None: + existing_data_behavior = 'overwrite_or_ignore' + + ds.write_dataset( + table, root_path, filesystem=filesystem, + format=parquet_format, file_options=write_options, schema=schema, + partitioning=partitioning, use_threads=use_threads, + file_visitor=file_visitor, + basename_template=basename_template, + existing_data_behavior=existing_data_behavior, + max_rows_per_group=row_group_size) + return + + # warnings and errors when using legacy implementation + if use_legacy_dataset: + warnings.warn( + "Passing 'use_legacy_dataset=True' to get the legacy behaviour is " + "deprecated as of pyarrow 8.0.0, and the legacy implementation " + "will be removed in a future version.", + FutureWarning, stacklevel=2) + msg2 = ( + "The '{}' argument is not supported with the legacy " + "implementation. To use this argument specify " + "'use_legacy_dataset=False' while constructing the " + "ParquetDataset." + ) + if schema is not None: + raise ValueError(msg2.format("schema")) + if partitioning is not None: + raise ValueError(msg2.format("partitioning")) + if use_threads is not None: + raise ValueError(msg2.format("use_threads")) + if file_visitor is not None: + raise ValueError(msg2.format("file_visitor")) + if existing_data_behavior is not None: + raise ValueError(msg2.format("existing_data_behavior")) + if basename_template is not None: + raise ValueError(msg2.format("basename_template")) + if partition_filename_cb is not None: + warnings.warn( + _DEPR_MSG.format("partition_filename_cb", " Specify " + "'use_legacy_dataset=False' while constructing " + "the ParquetDataset, and then use the " + "'basename_template' parameter instead. For " + "usage see `pyarrow.dataset.write_dataset`"), + FutureWarning, stacklevel=2) + + # Legacy implementation + fs, root_path = legacyfs.resolve_filesystem_and_path(root_path, filesystem) + + _mkdir_if_not_exists(fs, root_path) + + if partition_cols is not None and len(partition_cols) > 0: + df = table.to_pandas() + partition_keys = [df[col] for col in partition_cols] + data_df = df.drop(partition_cols, axis='columns') + data_cols = df.columns.drop(partition_cols) + if len(data_cols) == 0: + raise ValueError('No data left to save outside partition columns') + + subschema = table.schema + + # ARROW-2891: Ensure the output_schema is preserved when writing a + # partitioned dataset + for col in table.schema.names: + if col in partition_cols: + subschema = subschema.remove(subschema.get_field_index(col)) + + for keys, subgroup in data_df.groupby(partition_keys): + if not isinstance(keys, tuple): + keys = (keys,) + subdir = '/'.join( + ['{colname}={value}'.format(colname=name, value=val) + for name, val in zip(partition_cols, keys)]) + subtable = pa.Table.from_pandas(subgroup, schema=subschema, + safe=False) + _mkdir_if_not_exists(fs, '/'.join([root_path, subdir])) + if partition_filename_cb: + outfile = partition_filename_cb(keys) + else: + outfile = guid() + '.parquet' + relative_path = '/'.join([subdir, outfile]) + full_path = '/'.join([root_path, relative_path]) + with fs.open(full_path, 'wb') as f: + write_table(subtable, f, metadata_collector=metadata_collector, + **kwargs) + if metadata_collector is not None: + metadata_collector[-1].set_file_path(relative_path) + else: + if partition_filename_cb: + outfile = partition_filename_cb(None) + else: + outfile = guid() + '.parquet' + full_path = '/'.join([root_path, outfile]) + with fs.open(full_path, 'wb') as f: + write_table(table, f, metadata_collector=metadata_collector, + **kwargs) + if metadata_collector is not None: + metadata_collector[-1].set_file_path(outfile) + + +def write_metadata(schema, where, metadata_collector=None, **kwargs): + """ + Write metadata-only Parquet file from schema. This can be used with + `write_to_dataset` to generate `_common_metadata` and `_metadata` sidecar + files. + + Parameters + ---------- + schema : pyarrow.Schema + where : string or pyarrow.NativeFile + metadata_collector : list + where to collect metadata information. + **kwargs : dict, + Additional kwargs for ParquetWriter class. See docstring for + `ParquetWriter` for more information. + + Examples + -------- + Generate example data: + + >>> import pyarrow as pa + >>> table = pa.table({'n_legs': [2, 2, 4, 4, 5, 100], + ... 'animal': ["Flamingo", "Parrot", "Dog", "Horse", + ... "Brittle stars", "Centipede"]}) + + Write a dataset and collect metadata information. + + >>> metadata_collector = [] + >>> import pyarrow.parquet as pq + >>> pq.write_to_dataset( + ... table, 'dataset_metadata', + ... metadata_collector=metadata_collector) + + Write the `_common_metadata` parquet file without row groups statistics. + + >>> pq.write_metadata( + ... table.schema, 'dataset_metadata/_common_metadata') + + Write the `_metadata` parquet file with row groups statistics. + + >>> pq.write_metadata( + ... table.schema, 'dataset_metadata/_metadata', + ... metadata_collector=metadata_collector) + """ + writer = ParquetWriter(where, schema, **kwargs) + writer.close() + + if metadata_collector is not None: + # ParquetWriter doesn't expose the metadata until it's written. Write + # it and read it again. + metadata = read_metadata(where) + for m in metadata_collector: + metadata.append_row_groups(m) + metadata.write_metadata_file(where) + + +def read_metadata(where, memory_map=False, decryption_properties=None, + filesystem=None): + """ + Read FileMetaData from footer of a single Parquet file. + + Parameters + ---------- + where : str (file path) or file-like object + memory_map : bool, default False + Create memory map when the source is a file path. + decryption_properties : FileDecryptionProperties, default None + Decryption properties for reading encrypted Parquet files. + filesystem : FileSystem, default None + If nothing passed, will be inferred based on path. + Path will try to be found in the local on-disk filesystem otherwise + it will be parsed as an URI to determine the filesystem. + + Returns + ------- + metadata : FileMetaData + + Examples + -------- + >>> import pyarrow as pa + >>> import pyarrow.parquet as pq + >>> table = pa.table({'n_legs': [4, 5, 100], + ... 'animal': ["Dog", "Brittle stars", "Centipede"]}) + >>> pq.write_table(table, 'example.parquet') + + >>> pq.read_metadata('example.parquet') + + created_by: parquet-cpp-arrow version ... + num_columns: 2 + num_rows: 3 + num_row_groups: 1 + format_version: 2.6 + serialized_size: ... + """ + filesystem, where = _resolve_filesystem_and_path(where, filesystem) + file_ctx = nullcontext() + if filesystem is not None: + file_ctx = where = filesystem.open_input_file(where) + + with file_ctx: + file = ParquetFile(where, memory_map=memory_map, + decryption_properties=decryption_properties) + return file.metadata + + +def read_schema(where, memory_map=False, decryption_properties=None, + filesystem=None): + """ + Read effective Arrow schema from Parquet file metadata. + + Parameters + ---------- + where : str (file path) or file-like object + memory_map : bool, default False + Create memory map when the source is a file path. + decryption_properties : FileDecryptionProperties, default None + Decryption properties for reading encrypted Parquet files. + filesystem : FileSystem, default None + If nothing passed, will be inferred based on path. + Path will try to be found in the local on-disk filesystem otherwise + it will be parsed as an URI to determine the filesystem. + + Returns + ------- + schema : pyarrow.Schema + + Examples + -------- + >>> import pyarrow as pa + >>> import pyarrow.parquet as pq + >>> table = pa.table({'n_legs': [4, 5, 100], + ... 'animal': ["Dog", "Brittle stars", "Centipede"]}) + >>> pq.write_table(table, 'example.parquet') + + >>> pq.read_schema('example.parquet') + n_legs: int64 + animal: string + """ + filesystem, where = _resolve_filesystem_and_path(where, filesystem) + file_ctx = nullcontext() + if filesystem is not None: + file_ctx = where = filesystem.open_input_file(where) + + with file_ctx: + file = ParquetFile( + where, memory_map=memory_map, + decryption_properties=decryption_properties) + return file.schema.to_arrow_schema() + + +# re-export everything +# std `from . import *` ignores symbols with leading `_` +__all__ = list(sys.modules[__name__].__dict__) diff --git a/cpp/src/arrow/python/ArrowPythonConfig.cmake.in b/python/pyarrow/src/ArrowPythonConfig.cmake.in similarity index 100% rename from cpp/src/arrow/python/ArrowPythonConfig.cmake.in rename to python/pyarrow/src/ArrowPythonConfig.cmake.in diff --git a/cpp/src/arrow/python/ArrowPythonFlightConfig.cmake.in b/python/pyarrow/src/ArrowPythonFlightConfig.cmake.in similarity index 100% rename from cpp/src/arrow/python/ArrowPythonFlightConfig.cmake.in rename to python/pyarrow/src/ArrowPythonFlightConfig.cmake.in diff --git a/python/pyarrow/src/CMakeLists.txt b/python/pyarrow/src/CMakeLists.txt new file mode 100644 index 0000000000000..178b156a5c0a1 --- /dev/null +++ b/python/pyarrow/src/CMakeLists.txt @@ -0,0 +1,457 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# arrow_python +# + +cmake_minimum_required(VERSION 3.5) + +# RPATH settings on macOS do not affect install_name. +# https://cmake.org/cmake/help/latest/policy/CMP0068.html +if(POLICY CMP0068) + cmake_policy(SET CMP0068 NEW) +endif() + +# +# Define +# CMAKE_MODULE_PATH: location of cmake_modules in python +# + +get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY) +get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY) +get_filename_component(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR} DIRECTORY) +set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE_DIR}/cpp") + +# normalize ARROW_HOME path +file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME) +set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules" "${ARROW_HOME}/lib/cmake/arrow") + +# +# Arrow version +# + +set(ARROW_PYTHON_VERSION "10.0.0-SNAPSHOT") +string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION "${ARROW_PYTHON_VERSION}") +# Need to set to ARRROW_VERSION before finding Arrow package! +project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}") + +if(NOT DEFINED CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +# +# Arrow +# + +find_package(Arrow REQUIRED) +include(ArrowOptions) + +# +# Python +# +# Use the first Python installation on PATH, not the newest one +set(Python3_FIND_STRATEGY "LOCATION") +# On Windows, use registry last, not first +set(Python3_FIND_REGISTRY "LAST") +# On macOS, use framework last, not first +set(Python3_FIND_FRAMEWORK "LAST") + +find_package(Python3Alt 3.7 REQUIRED) +include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} ${ARROW_INCLUDE_DIR} src) + +add_custom_target(arrow_python-all) +add_custom_target(arrow_python) +add_custom_target(arrow_python-tests) +add_dependencies(arrow_python-all arrow_python arrow_python-tests) + +set(ARROW_PYTHON_SRCS + arrow_to_pandas.cc + benchmark.cc + common.cc + datetime.cc + decimal.cc + deserialize.cc + extension_type.cc + gdb.cc + helpers.cc + inference.cc + init.cc + io.cc + ipc.cc + numpy_convert.cc + numpy_to_arrow.cc + python_to_arrow.cc + pyarrow.cc + serialize.cc + udf.cc) + +set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON + SKIP_UNITY_BUILD_INCLUSION ON) + +# +# Arrow vs PyArrow cpp options +# + +# Check all the options from Arrow and PyArrow cpp to be in line +if(PYARROW_WITH_DATASET) + find_package(ArrowDataset REQUIRED) +endif() + +if(PYARROW_WITH_PARQUET_ENCRYPTION) + if(PARQUET_REQUIRE_ENCRYPTION) + list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc) + find_package(Parquet REQUIRED) + else() + message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON") + endif() +endif() + +if(PYARROW_WITH_HDFS) + if(NOT ARROW_HDFS) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON") + endif() +endif() + +# Check for only Arrow C++ options +if(ARROW_CSV) + list(APPEND ARROW_PYTHON_SRCS csv.cc) +endif() + +if(ARROW_FILESYSTEM) + list(APPEND ARROW_PYTHON_SRCS filesystem.cc) +endif() + +# Link to arrow dependecies +if(ARROW_BUILD_SHARED) + set(ARROW_PYTHON_DEPENDENCIES arrow_shared) +else() + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) + set(ARROW_PYTHON_DEPENDENCIES arrow_static Threads::Threads) +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set_property(SOURCE pyarrow.cc + APPEND_STRING + PROPERTY COMPILE_FLAGS " -Wno-cast-qual ") +endif() + +# +# Compiler stuff +# + +include(GNUInstallDirs) + +# This ensures that things like gnu++11 get passed correctly +if(NOT DEFINED CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 11) +endif() + +# We require a C++11 compliant compiler +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Needed compiler flags +include(SetupCxxFlags) + +# +# Shared/static link libs +# + +set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared) +set(ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS) +set(ARROW_PYTHON_STATIC_LINK_LIBS ${PYTHON_OTHER_LIBS}) + +if(WIN32) + list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS ${PYTHON_LIBRARIES} ${PYTHON_OTHER_LIBS}) +endif() + +if(PARQUET_REQUIRE_ENCRYPTION AND PYARROW_WITH_PARQUET_ENCRYPTION) + list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS parquet_shared) +endif() + +set(ARROW_PYTHON_INCLUDES ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS}) + +# Inlude macros needed to find and use add_arrow_lib function +include(BuildUtils) +include(CMakePackageConfigHelpers) + +# Set the output directory for cmake module +# (CMAKE_INSTALL_PREFIX = python/build/dist! should be set in setup.py!) +set(ARROW_CMAKE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") + +# Changing ARROW_SOURCE_DIR for sdist build +# In this case cpp/cmake_modules doesn't exist +if(NOT EXISTS "${ARROW_SOURCE_DIR}/cpp/cmake_modules/Find${MODULE}.cmake") + set(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR}) +endif() + +add_arrow_lib(arrow_python + CMAKE_PACKAGE_NAME + ArrowPython + PKG_CONFIG_NAME + arrow-python + SOURCES + ${ARROW_PYTHON_SRCS} + PRECOMPILED_HEADERS + "$<$:pch.h>" + OUTPUTS + ARROW_PYTHON_LIBRARIES + DEPENDENCIES + ${ARROW_PYTHON_DEPENDENCIES} + SHARED_LINK_FLAGS + ${ARROW_VERSION_SCRIPT_FLAGS} + SHARED_LINK_LIBS + ${ARROW_PYTHON_SHARED_LINK_LIBS} + SHARED_PRIVATE_LINK_LIBS + ${ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS} + STATIC_LINK_LIBS + ${ARROW_PYTHON_STATIC_LINK_LIBS} + EXTRA_INCLUDES + "${ARROW_PYTHON_INCLUDES}") + +add_dependencies(arrow_python ${ARROW_PYTHON_LIBRARIES}) + +foreach(LIB_TARGET ${ARROW_PYTHON_LIBRARIES}) + target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYTHON_EXPORTING) +endforeach() + +if(ARROW_BUILD_STATIC AND MSVC) + target_compile_definitions(arrow_python_static PUBLIC ARROW_STATIC) +endif() + +if(ARROW_FLIGHT AND ARROW_BUILD_SHARED) + # Must link to shared libarrow_flight: we don't want to link more than one + # copy of gRPC into the eventual Cython shared object, otherwise gRPC calls + # fail with weird errors due to multiple copies of global static state (The + # other solution is to link gRPC shared everywhere instead of statically only + # in Flight) + find_package(ArrowFlight REQUIRED) + + set(FLIGHT_LINK_LIBS arrow_flight_shared) + + add_arrow_lib(arrow_python_flight + CMAKE_PACKAGE_NAME + ArrowPythonFlight + PKG_CONFIG_NAME + arrow-python-flight + SOURCES + flight.cc + OUTPUTS + ARROW_PYFLIGHT_LIBRARIES + SHARED_LINK_FLAGS + ${ARROW_VERSION_SCRIPT_FLAGS} + SHARED_LINK_LIBS + arrow_python_shared + arrow_flight_shared + STATIC_LINK_LIBS + ${PYTHON_OTHER_LIBS} + EXTRA_INCLUDES + "${ARROW_PYTHON_INCLUDES}" + PRIVATE_INCLUDES + "${Protobuf_INCLUDE_DIRS}") + + add_dependencies(arrow_python ${ARROW_PYFLIGHT_LIBRARIES}) + + foreach(LIB_TARGET ${ARROW_PYFLIGHT_LIBRARIES}) + target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYFLIGHT_EXPORTING) + endforeach() + + if(ARROW_BUILD_STATIC AND MSVC) + target_compile_definitions(arrow_python_flight_static PUBLIC ARROW_STATIC) + endif() +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + # Clang, be quiet. Python C API has lots of macros + set_property(SOURCE ${ARROW_PYTHON_SRCS} + APPEND_STRING + PROPERTY COMPILE_FLAGS -Wno-parentheses-equality) +endif() + +arrow_install_all_headers("arrow/python") + +# ---------------------------------------------------------------------- + +# +# Tests +# The tests will be moved to Cython and are currently supported for bundled GTest +# Follow-up: https://issues.apache.org/jira/browse/ARROW-17016 +# + +if(ARROW_BUILD_TESTS) + + enable_testing() + set(GTEST_ROOT ${ARROW_CPP_SOURCE_DIR}/${ARROW_BUILD_DIR}/googletest_ep-prefix) + + # GTest must be built from source + if(EXISTS ${ARROW_CPP_SOURCE_DIR}/${ARROW_BUILD_DIR}/googletest_ep-prefix) + + # Set necessary paths for cmake to find GTest + set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/include") + set(GTEST_LIBRARY ${GTEST_ROOT}/lib) + set(GTEST_MAIN_LIBRARY ${GTEST_ROOT}/lib) + + # + # Taken from Matlab CMakeLists.txt (enable_gtest and build_gtest) + # + + set(ARROW_GTEST_PREFIX "${GTEST_ROOT}") + set(ARROW_GTEST_MAIN_PREFIX "${GTEST_ROOT}") + + if(WIN32) + set(ARROW_GTEST_SHARED_LIB_DIR "${ARROW_GTEST_PREFIX}/bin") + set(ARROW_GTEST_MAIN_SHARED_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/bin") + + set(ARROW_GTEST_LINK_LIB_DIR "${ARROW_GTEST_PREFIX}/lib") + set(ARROW_GTEST_LINK_LIB + "${ARROW_GTEST_LINK_LIB_DIR}/${CMAKE_IMPORT_LIBRARY_PREFIX}gtestd${CMAKE_IMPORT_LIBRARY_SUFFIX}" + ) + + set(ARROW_GTEST_MAIN_LINK_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib") + set(ARROW_GTEST_MAIN_LINK_LIB + "${ARROW_GTEST_MAIN_LINK_LIB_DIR}/${CMAKE_IMPORT_LIBRARY_PREFIX}gtest_maind${CMAKE_IMPORT_LIBRARY_SUFFIX}" + ) + else() + set(ARROW_GTEST_SHARED_LIB_DIR "${ARROW_GTEST_PREFIX}/lib") + set(ARROW_GTEST_MAIN_SHARED_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib") + endif() + + set(ARROW_GTEST_INCLUDE_DIR "${ARROW_GTEST_PREFIX}/include") + set(ARROW_GTEST_SHARED_LIB + "${ARROW_GTEST_SHARED_LIB_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtestd${CMAKE_SHARED_LIBRARY_SUFFIX}" + ) + + set(ARROW_GTEST_MAIN_INCLUDE_DIR "${ARROW_GTEST_MAIN_PREFIX}/include") + set(ARROW_GTEST_MAIN_SHARED_LIB + "${ARROW_GTEST_MAIN_SHARED_LIB_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_maind${CMAKE_SHARED_LIBRARY_SUFFIX}" + ) + + file(MAKE_DIRECTORY "${ARROW_GTEST_INCLUDE_DIR}") + + # Create target GTest::gtest + add_library(GTest::gtest SHARED IMPORTED) + set_target_properties(GTest::gtest + PROPERTIES IMPORTED_LOCATION ${ARROW_GTEST_SHARED_LIB} + INTERFACE_INCLUDE_DIRECTORIES + ${ARROW_GTEST_INCLUDE_DIR}) + if(WIN32) + set_target_properties(GTest::gtest PROPERTIES IMPORTED_IMPLIB ${ARROW_GTEST_LINK_LIB}) + endif() + + # ArrowTesting + # needed to be able to use arrow_testing_shared target + find_package(ArrowTesting REQUIRED) + + add_custom_target(all-tests) + + add_library(arrow_python_test_main STATIC util/test_main.cc) + + target_link_libraries(arrow_python_test_main GTest::gtest) + target_include_directories(arrow_python_test_main SYSTEM + PUBLIC ${ARROW_PYTHON_INCLUDES}) + + # Link libraries to avoid include error on Linux + if(ARROW_TEST_LINKAGE STREQUAL shared) + target_link_libraries(arrow_python_test_main arrow_shared) + else() + target_link_libraries(arrow_python_test_main arrow_static) + endif() + + if(APPLE) + target_link_libraries(arrow_python_test_main ${CMAKE_DL_LIBS}) + set_target_properties(arrow_python_test_main PROPERTIES LINK_FLAGS + "-undefined dynamic_lookup") + elseif(NOT MSVC) + target_link_libraries(arrow_python_test_main pthread ${CMAKE_DL_LIBS}) + endif() + + if(ARROW_TEST_LINKAGE STREQUAL shared) + set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main arrow_python_shared + arrow_testing_shared arrow_shared) + else() + set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main arrow_python_static + arrow_testing_static arrow_static) + endif() + + # + # Add a test case + # + + set(REL_TEST_NAME "python_test") + get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) + set(TEST_NAME "arrow-${TEST_NAME}") + set(SOURCES "${REL_TEST_NAME}.cc") + + # # Make sure the executable name contains only hyphens, not underscores + string(REPLACE "_" "-" TEST_NAME ${TEST_NAME}) + + set(TEST_PATH "${CMAKE_BINARY_DIR}/${TEST_NAME}") + add_executable(${TEST_NAME} ${SOURCES}) + + # We need to set the correct RPATH so that dependencies + set_target_properties(${TEST_NAME} + PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE + INSTALL_RPATH_USE_LINK_PATH TRUE + INSTALL_RPATH + "${PYTHON_SOURCE_DIR}/pyarrow;$ENV{CONDA_PREFIX}/lib") + + # Customize link libraries + target_link_libraries(${TEST_NAME} PRIVATE "${ARROW_PYTHON_TEST_LINK_LIBS}") + # Extra link libs + target_link_libraries(${TEST_NAME} PRIVATE ${PYTHON_LIBRARIES}) + # Extra includes + target_include_directories(${TEST_NAME} SYSTEM PUBLIC "${ARROW_PYTHON_INCLUDES}") + + # Add the test + if(WIN32) + add_test(${TEST_NAME} ${TEST_PATH}) + else() + add_test(${TEST_NAME} + ${ARROW_CPP_SOURCE_DIR}/build-support/run-test.sh + ${CMAKE_BINARY_DIR} + test + ${TEST_PATH}) + endif() + + # Add test as dependency of relevant targets + add_dependencies(all-tests ${TEST_NAME}) + add_dependencies(arrow_python-tests ${TEST_NAME}) + + set(LABELS) + list(APPEND LABELS "unittest" arrow_python-tests) + + # ensure there is a cmake target which exercises tests with this LABEL + set(LABEL_TEST_NAME "test-arrow_python-tests") + if(NOT TARGET ${LABEL_TEST_NAME}) + add_custom_target(${LABEL_TEST_NAME} + ctest -L "${LABEL}" --output-on-failure + USES_TERMINAL) + endif() + # ensure the test is (re)built before the LABEL test runs + add_dependencies(${LABEL_TEST_NAME} ${TEST_NAME}) + + set_property(TEST ${TEST_NAME} + APPEND + PROPERTY LABELS ${LABELS}) + + else() + message(STATUS "Tests for PyArrow CPP not build") + message(STATUS "Set -DGTest_SOURCE=BUNDLED when building Arrow C++ + to enable building tests for PyArrow CPP") + endif() +endif() \ No newline at end of file diff --git a/experimental/computeir/Plan.fbs b/python/pyarrow/src/api.h similarity index 74% rename from experimental/computeir/Plan.fbs rename to python/pyarrow/src/api.h index 779974ac92586..28c46c95e9474 100644 --- a/experimental/computeir/Plan.fbs +++ b/python/pyarrow/src/api.h @@ -15,14 +15,16 @@ // specific language governing permissions and limitations // under the License. -include "Relation.fbs"; +#pragma once -namespace org.apache.arrow.computeir.flatbuf; - -/// A specification of a query. -table Plan { - /// One or more output relations. - sinks: [Relation] (required); -} - -root_type Plan; +#include "arrow_to_pandas.h" +#include "common.h" +#include "datetime.h" +#include "deserialize.h" +#include "helpers.h" +#include "inference.h" +#include "io.h" +#include "numpy_convert.h" +#include "numpy_to_arrow.h" +#include "python_to_arrow.h" +#include "serialize.h" diff --git a/cpp/src/arrow/python/arrow-python-flight.pc.in b/python/pyarrow/src/arrow-python-flight.pc.in similarity index 100% rename from cpp/src/arrow/python/arrow-python-flight.pc.in rename to python/pyarrow/src/arrow-python-flight.pc.in diff --git a/cpp/src/arrow/python/arrow-python.pc.in b/python/pyarrow/src/arrow-python.pc.in similarity index 100% rename from cpp/src/arrow/python/arrow-python.pc.in rename to python/pyarrow/src/arrow-python.pc.in diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow_to_pandas.cc similarity index 99% rename from cpp/src/arrow/python/arrow_to_pandas.cc rename to python/pyarrow/src/arrow_to_pandas.cc index 8f9d1cb45b983..437f0f11925ca 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.cc +++ b/python/pyarrow/src/arrow_to_pandas.cc @@ -17,8 +17,8 @@ // Functions for pandas conversion via NumPy -#include "arrow/python/arrow_to_pandas.h" -#include "arrow/python/numpy_interop.h" // IWYU pragma: expand +#include "arrow_to_pandas.h" +#include "numpy_interop.h" // IWYU pragma: expand #include #include @@ -48,16 +48,16 @@ #include "arrow/compute/api.h" -#include "arrow/python/arrow_to_python_internal.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" -#include "arrow/python/decimal.h" -#include "arrow/python/helpers.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/numpy_internal.h" -#include "arrow/python/pyarrow.h" -#include "arrow/python/python_to_arrow.h" -#include "arrow/python/type_traits.h" +#include "arrow_to_python_internal.h" +#include "common.h" +#include "datetime.h" +#include "decimal.h" +#include "helpers.h" +#include "numpy_convert.h" +#include "numpy_internal.h" +#include "pyarrow.h" +#include "python_to_arrow.h" +#include "type_traits.h" namespace arrow { diff --git a/cpp/src/arrow/python/arrow_to_pandas.h b/python/pyarrow/src/arrow_to_pandas.h similarity index 98% rename from cpp/src/arrow/python/arrow_to_pandas.h rename to python/pyarrow/src/arrow_to_pandas.h index 6570364b8d2a1..33c08b6fe8128 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.h +++ b/python/pyarrow/src/arrow_to_pandas.h @@ -20,14 +20,14 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include #include #include "arrow/memory_pool.h" -#include "arrow/python/visibility.h" +#include "visibility.h" namespace arrow { diff --git a/cpp/src/arrow/python/arrow_to_python_internal.h b/python/pyarrow/src/arrow_to_python_internal.h similarity index 97% rename from cpp/src/arrow/python/arrow_to_python_internal.h rename to python/pyarrow/src/arrow_to_python_internal.h index 514cda3200123..251c2a38ca0cd 100644 --- a/cpp/src/arrow/python/arrow_to_python_internal.h +++ b/python/pyarrow/src/arrow_to_python_internal.h @@ -18,7 +18,7 @@ #pragma once #include "arrow/array.h" -#include "arrow/python/platform.h" +#include "platform.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/benchmark.cc b/python/pyarrow/src/benchmark.cc similarity index 94% rename from cpp/src/arrow/python/benchmark.cc rename to python/pyarrow/src/benchmark.cc index 2d29f69d25bdb..1e56552ed80b1 100644 --- a/cpp/src/arrow/python/benchmark.cc +++ b/python/pyarrow/src/benchmark.cc @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include -#include +#include "benchmark.h" +#include "helpers.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/benchmark.h b/python/pyarrow/src/benchmark.h similarity index 93% rename from cpp/src/arrow/python/benchmark.h rename to python/pyarrow/src/benchmark.h index 8060dd33722a0..883f02fecebb1 100644 --- a/cpp/src/arrow/python/benchmark.h +++ b/python/pyarrow/src/benchmark.h @@ -17,9 +17,9 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" -#include "arrow/python/visibility.h" +#include "visibility.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/common.cc b/python/pyarrow/src/common.cc similarity index 98% rename from cpp/src/arrow/python/common.cc rename to python/pyarrow/src/common.cc index 6fe2ed4dae321..09dde3e448987 100644 --- a/cpp/src/arrow/python/common.cc +++ b/python/pyarrow/src/common.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/common.h" +#include "common.h" #include #include @@ -26,7 +26,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" -#include "arrow/python/helpers.h" +#include "helpers.h" namespace arrow { diff --git a/cpp/src/arrow/python/common.h b/python/pyarrow/src/common.h similarity index 99% rename from cpp/src/arrow/python/common.h rename to python/pyarrow/src/common.h index 5c16106730baa..768ff8dce440b 100644 --- a/cpp/src/arrow/python/common.h +++ b/python/pyarrow/src/common.h @@ -21,10 +21,10 @@ #include #include "arrow/buffer.h" -#include "arrow/python/pyarrow.h" -#include "arrow/python/visibility.h" #include "arrow/result.h" #include "arrow/util/macros.h" +#include "pyarrow.h" +#include "visibility.h" namespace arrow { diff --git a/cpp/src/arrow/python/csv.cc b/python/pyarrow/src/csv.cc similarity index 96% rename from cpp/src/arrow/python/csv.cc rename to python/pyarrow/src/csv.cc index d96c9400e2b64..61ff23a22ed89 100644 --- a/cpp/src/arrow/python/csv.cc +++ b/python/pyarrow/src/csv.cc @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/csv.h" +#include "csv.h" #include -#include "arrow/python/common.h" +#include "common.h" namespace arrow { diff --git a/cpp/src/arrow/python/csv.h b/python/pyarrow/src/csv.h similarity index 97% rename from cpp/src/arrow/python/csv.h rename to python/pyarrow/src/csv.h index 34302e9366739..e6e53af0f8582 100644 --- a/cpp/src/arrow/python/csv.h +++ b/python/pyarrow/src/csv.h @@ -23,8 +23,8 @@ #include #include "arrow/csv/options.h" -#include "arrow/python/common.h" #include "arrow/util/macros.h" +#include "common.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/datetime.cc b/python/pyarrow/src/datetime.cc similarity index 99% rename from cpp/src/arrow/python/datetime.cc rename to python/pyarrow/src/datetime.cc index 848b0a6bf102f..9604b529753a9 100644 --- a/cpp/src/arrow/python/datetime.cc +++ b/python/pyarrow/src/datetime.cc @@ -14,22 +14,22 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -#include "arrow/python/datetime.h" +#include "datetime.h" #include #include #include #include "arrow/array.h" -#include "arrow/python/arrow_to_python_internal.h" -#include "arrow/python/common.h" -#include "arrow/python/helpers.h" -#include "arrow/python/platform.h" #include "arrow/scalar.h" #include "arrow/status.h" #include "arrow/type.h" #include "arrow/util/logging.h" #include "arrow/util/value_parsing.h" +#include "arrow_to_python_internal.h" +#include "common.h" +#include "helpers.h" +#include "platform.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/datetime.h b/python/pyarrow/src/datetime.h similarity index 99% rename from cpp/src/arrow/python/datetime.h rename to python/pyarrow/src/datetime.h index dd07710aaf6a4..6f9bfbe2dbfd2 100644 --- a/cpp/src/arrow/python/datetime.h +++ b/python/pyarrow/src/datetime.h @@ -20,8 +20,8 @@ #include #include -#include "arrow/python/platform.h" -#include "arrow/python/visibility.h" +#include "platform.h" +#include "visibility.h" #include "arrow/status.h" #include "arrow/type.h" #include "arrow/type_fwd.h" diff --git a/cpp/src/arrow/python/decimal.cc b/python/pyarrow/src/decimal.cc similarity index 98% rename from cpp/src/arrow/python/decimal.cc rename to python/pyarrow/src/decimal.cc index 0c00fcfaa8e59..a7244f9dcc110 100644 --- a/cpp/src/arrow/python/decimal.cc +++ b/python/pyarrow/src/decimal.cc @@ -18,9 +18,9 @@ #include #include -#include "arrow/python/common.h" -#include "arrow/python/decimal.h" -#include "arrow/python/helpers.h" +#include "common.h" +#include "decimal.h" +#include "helpers.h" #include "arrow/type_fwd.h" #include "arrow/util/decimal.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/python/decimal.h b/python/pyarrow/src/decimal.h similarity index 99% rename from cpp/src/arrow/python/decimal.h rename to python/pyarrow/src/decimal.h index 1187037aed29e..5c4a17dcd480e 100644 --- a/cpp/src/arrow/python/decimal.h +++ b/python/pyarrow/src/decimal.h @@ -19,7 +19,7 @@ #include -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/type.h" namespace arrow { diff --git a/cpp/src/arrow/python/deserialize.cc b/python/pyarrow/src/deserialize.cc similarity index 98% rename from cpp/src/arrow/python/deserialize.cc rename to python/pyarrow/src/deserialize.cc index 961a1686e0a89..ad28874460a46 100644 --- a/cpp/src/arrow/python/deserialize.cc +++ b/python/pyarrow/src/deserialize.cc @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/deserialize.h" +#include "deserialize.h" -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" #include #include @@ -40,12 +40,12 @@ #include "arrow/util/logging.h" #include "arrow/util/value_parsing.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" -#include "arrow/python/helpers.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/pyarrow.h" -#include "arrow/python/serialize.h" +#include "common.h" +#include "datetime.h" +#include "helpers.h" +#include "numpy_convert.h" +#include "pyarrow.h" +#include "serialize.h" namespace arrow { diff --git a/cpp/src/arrow/python/deserialize.h b/python/pyarrow/src/deserialize.h similarity index 98% rename from cpp/src/arrow/python/deserialize.h rename to python/pyarrow/src/deserialize.h index 41b6a13a38875..08d0972048d19 100644 --- a/cpp/src/arrow/python/deserialize.h +++ b/python/pyarrow/src/deserialize.h @@ -21,8 +21,8 @@ #include #include -#include "arrow/python/serialize.h" -#include "arrow/python/visibility.h" +#include "serialize.h" +#include "visibility.h" #include "arrow/status.h" namespace arrow { diff --git a/cpp/src/arrow/python/extension_type.cc b/python/pyarrow/src/extension_type.cc similarity index 98% rename from cpp/src/arrow/python/extension_type.cc rename to python/pyarrow/src/extension_type.cc index 3ccc171c8713a..9fd2f2e1466d1 100644 --- a/cpp/src/arrow/python/extension_type.cc +++ b/python/pyarrow/src/extension_type.cc @@ -19,9 +19,9 @@ #include #include -#include "arrow/python/extension_type.h" -#include "arrow/python/helpers.h" -#include "arrow/python/pyarrow.h" +#include "extension_type.h" +#include "helpers.h" +#include "pyarrow.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/python/extension_type.h b/python/pyarrow/src/extension_type.h similarity index 97% rename from cpp/src/arrow/python/extension_type.h rename to python/pyarrow/src/extension_type.h index e433d9aca7081..76ddb32773389 100644 --- a/cpp/src/arrow/python/extension_type.h +++ b/python/pyarrow/src/extension_type.h @@ -21,8 +21,8 @@ #include #include "arrow/extension_type.h" -#include "arrow/python/common.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "visibility.h" #include "arrow/util/macros.h" namespace arrow { diff --git a/cpp/src/arrow/python/filesystem.cc b/python/pyarrow/src/filesystem.cc similarity index 99% rename from cpp/src/arrow/python/filesystem.cc rename to python/pyarrow/src/filesystem.cc index 5e9b500a4f7b4..17ca732e073db 100644 --- a/cpp/src/arrow/python/filesystem.cc +++ b/python/pyarrow/src/filesystem.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/filesystem.h" +#include "filesystem.h" #include "arrow/util/logging.h" namespace arrow { diff --git a/cpp/src/arrow/python/filesystem.h b/python/pyarrow/src/filesystem.h similarity index 98% rename from cpp/src/arrow/python/filesystem.h rename to python/pyarrow/src/filesystem.h index 003fd5cb80551..993145b532797 100644 --- a/cpp/src/arrow/python/filesystem.h +++ b/python/pyarrow/src/filesystem.h @@ -22,8 +22,8 @@ #include #include "arrow/filesystem/filesystem.h" -#include "arrow/python/common.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "visibility.h" #include "arrow/util/macros.h" namespace arrow { diff --git a/cpp/src/arrow/python/flight.cc b/python/pyarrow/src/flight.cc similarity index 98% rename from cpp/src/arrow/python/flight.cc rename to python/pyarrow/src/flight.cc index 9077bbe4acb7d..79b8db074a5e1 100644 --- a/cpp/src/arrow/python/flight.cc +++ b/python/pyarrow/src/flight.cc @@ -18,7 +18,7 @@ #include #include -#include "arrow/python/flight.h" +#include "flight.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" @@ -380,10 +380,7 @@ Status CreateFlightInfo(const std::shared_ptr& schema, Status CreateSchemaResult(const std::shared_ptr& schema, std::unique_ptr* out) { - ARROW_ASSIGN_OR_RAISE(auto result, arrow::flight::SchemaResult::Make(*schema)); - *out = std::unique_ptr( - new arrow::flight::SchemaResult(std::move(result))); - return Status::OK(); + return arrow::flight::SchemaResult::Make(*schema).Value(out); } } // namespace flight diff --git a/cpp/src/arrow/python/flight.h b/python/pyarrow/src/flight.h similarity index 99% rename from cpp/src/arrow/python/flight.h rename to python/pyarrow/src/flight.h index 5713b2e4b7006..7e0136fcc3e94 100644 --- a/cpp/src/arrow/python/flight.h +++ b/python/pyarrow/src/flight.h @@ -23,7 +23,7 @@ #include "arrow/flight/api.h" #include "arrow/ipc/dictionary.h" -#include "arrow/python/common.h" +#include "common.h" #if defined(_WIN32) || defined(__CYGWIN__) // Windows #if defined(_MSC_VER) diff --git a/cpp/src/arrow/python/gdb.cc b/python/pyarrow/src/gdb.cc similarity index 99% rename from cpp/src/arrow/python/gdb.cc rename to python/pyarrow/src/gdb.cc index 944e1e96d7160..297bc6dbffc71 100644 --- a/cpp/src/arrow/python/gdb.cc +++ b/python/pyarrow/src/gdb.cc @@ -24,7 +24,7 @@ #include "arrow/datum.h" #include "arrow/extension_type.h" #include "arrow/ipc/json_simple.h" -#include "arrow/python/gdb.h" +#include "gdb.h" #include "arrow/record_batch.h" #include "arrow/scalar.h" #include "arrow/table.h" diff --git a/cpp/src/arrow/python/gdb.h b/python/pyarrow/src/gdb.h similarity index 96% rename from cpp/src/arrow/python/gdb.h rename to python/pyarrow/src/gdb.h index 1ddcbb51f6e0b..b4296abe6ddcd 100644 --- a/cpp/src/arrow/python/gdb.h +++ b/python/pyarrow/src/gdb.h @@ -17,7 +17,7 @@ #pragma once -#include "arrow/python/visibility.h" +#include "visibility.h" namespace arrow { namespace gdb { diff --git a/cpp/src/arrow/python/helpers.cc b/python/pyarrow/src/helpers.cc similarity index 99% rename from cpp/src/arrow/python/helpers.cc rename to python/pyarrow/src/helpers.cc index c266abc169d49..73d7cd8dcfc6e 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/python/pyarrow/src/helpers.cc @@ -16,17 +16,17 @@ // under the License. // helpers.h includes a NumPy header, so we include this first -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" -#include "arrow/python/helpers.h" +#include "helpers.h" #include #include #include #include -#include "arrow/python/common.h" -#include "arrow/python/decimal.h" +#include "common.h" +#include "decimal.h" #include "arrow/type_fwd.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/python/helpers.h b/python/pyarrow/src/helpers.h similarity index 97% rename from cpp/src/arrow/python/helpers.h rename to python/pyarrow/src/helpers.h index a8e5f80b60678..089d1225dd6ac 100644 --- a/cpp/src/arrow/python/helpers.h +++ b/python/pyarrow/src/helpers.h @@ -17,18 +17,18 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include #include #include -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" #include -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/type.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/python/inference.cc b/python/pyarrow/src/inference.cc similarity index 98% rename from cpp/src/arrow/python/inference.cc rename to python/pyarrow/src/inference.cc index db5f0896a95bc..513b0bfdbbbc0 100644 --- a/cpp/src/arrow/python/inference.cc +++ b/python/pyarrow/src/inference.cc @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/inference.h" -#include "arrow/python/numpy_interop.h" +#include "inference.h" +#include "numpy_interop.h" #include @@ -31,11 +31,11 @@ #include "arrow/util/decimal.h" #include "arrow/util/logging.h" -#include "arrow/python/datetime.h" -#include "arrow/python/decimal.h" -#include "arrow/python/helpers.h" -#include "arrow/python/iterators.h" -#include "arrow/python/numpy_convert.h" +#include "datetime.h" +#include "decimal.h" +#include "helpers.h" +#include "iterators.h" +#include "numpy_convert.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/inference.h b/python/pyarrow/src/inference.h similarity index 94% rename from cpp/src/arrow/python/inference.h rename to python/pyarrow/src/inference.h index eff1836293430..24005dd96f15f 100644 --- a/cpp/src/arrow/python/inference.h +++ b/python/pyarrow/src/inference.h @@ -20,15 +20,15 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/type.h" #include "arrow/util/macros.h" -#include "arrow/python/common.h" +#include "common.h" namespace arrow { diff --git a/cpp/src/arrow/python/init.cc b/python/pyarrow/src/init.cc similarity index 93% rename from cpp/src/arrow/python/init.cc rename to python/pyarrow/src/init.cc index dba293bbe2366..f09c5cd12a5a4 100644 --- a/cpp/src/arrow/python/init.cc +++ b/python/pyarrow/src/init.cc @@ -18,7 +18,7 @@ // Trigger the array import (inversion of NO_IMPORT_ARRAY) #define NUMPY_IMPORT_ARRAY -#include "arrow/python/init.h" -#include "arrow/python/numpy_interop.h" +#include "init.h" +#include "numpy_interop.h" int arrow_init_numpy() { return arrow::py::import_numpy(); } diff --git a/cpp/src/arrow/python/init.h b/python/pyarrow/src/init.h similarity index 92% rename from cpp/src/arrow/python/init.h rename to python/pyarrow/src/init.h index 2e6c954862bd9..eab467b631e73 100644 --- a/cpp/src/arrow/python/init.h +++ b/python/pyarrow/src/init.h @@ -17,8 +17,8 @@ #pragma once -#include "arrow/python/platform.h" -#include "arrow/python/visibility.h" +#include "platform.h" +#include "visibility.h" extern "C" { ARROW_PYTHON_EXPORT diff --git a/cpp/src/arrow/python/io.cc b/python/pyarrow/src/io.cc similarity index 99% rename from cpp/src/arrow/python/io.cc rename to python/pyarrow/src/io.cc index 73525feed3848..173d84ff56753 100644 --- a/cpp/src/arrow/python/io.cc +++ b/python/pyarrow/src/io.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/io.h" +#include "io.h" #include #include @@ -28,8 +28,8 @@ #include "arrow/status.h" #include "arrow/util/logging.h" -#include "arrow/python/common.h" -#include "arrow/python/pyarrow.h" +#include "common.h" +#include "pyarrow.h" namespace arrow { diff --git a/cpp/src/arrow/python/io.h b/python/pyarrow/src/io.h similarity index 98% rename from cpp/src/arrow/python/io.h rename to python/pyarrow/src/io.h index a38d0ca332c97..53b15434ea67d 100644 --- a/cpp/src/arrow/python/io.h +++ b/python/pyarrow/src/io.h @@ -22,8 +22,8 @@ #include "arrow/io/interfaces.h" #include "arrow/io/transform.h" -#include "arrow/python/common.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "visibility.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/ipc.cc b/python/pyarrow/src/ipc.cc similarity index 96% rename from cpp/src/arrow/python/ipc.cc rename to python/pyarrow/src/ipc.cc index 2e6c9d912756a..bed3da2d1ac5a 100644 --- a/cpp/src/arrow/python/ipc.cc +++ b/python/pyarrow/src/ipc.cc @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/ipc.h" +#include "ipc.h" #include -#include "arrow/python/pyarrow.h" +#include "pyarrow.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/ipc.h b/python/pyarrow/src/ipc.h similarity index 95% rename from cpp/src/arrow/python/ipc.h rename to python/pyarrow/src/ipc.h index 92232ed830093..38839af82fd81 100644 --- a/cpp/src/arrow/python/ipc.h +++ b/python/pyarrow/src/ipc.h @@ -19,8 +19,8 @@ #include -#include "arrow/python/common.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "visibility.h" #include "arrow/record_batch.h" #include "arrow/result.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/python/iterators.h b/python/pyarrow/src/iterators.h similarity index 98% rename from cpp/src/arrow/python/iterators.h rename to python/pyarrow/src/iterators.h index 7b31962dac5b8..d581adf52c96a 100644 --- a/cpp/src/arrow/python/iterators.h +++ b/python/pyarrow/src/iterators.h @@ -21,8 +21,8 @@ #include "arrow/array/array_primitive.h" -#include "arrow/python/common.h" -#include "arrow/python/numpy_internal.h" +#include "common.h" +#include "numpy_internal.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/numpy_convert.cc b/python/pyarrow/src/numpy_convert.cc similarity index 99% rename from cpp/src/arrow/python/numpy_convert.cc rename to python/pyarrow/src/numpy_convert.cc index 49706807644d2..d8e10605daac1 100644 --- a/cpp/src/arrow/python/numpy_convert.cc +++ b/python/pyarrow/src/numpy_convert.cc @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" -#include "arrow/python/numpy_convert.h" +#include "numpy_convert.h" #include #include @@ -30,9 +30,9 @@ #include "arrow/type.h" #include "arrow/util/logging.h" -#include "arrow/python/common.h" -#include "arrow/python/pyarrow.h" -#include "arrow/python/type_traits.h" +#include "common.h" +#include "pyarrow.h" +#include "type_traits.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/numpy_convert.h b/python/pyarrow/src/numpy_convert.h similarity index 98% rename from cpp/src/arrow/python/numpy_convert.h rename to python/pyarrow/src/numpy_convert.h index 10451077a221d..d2772dcdcdf23 100644 --- a/cpp/src/arrow/python/numpy_convert.h +++ b/python/pyarrow/src/numpy_convert.h @@ -20,14 +20,14 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include #include #include "arrow/buffer.h" -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/sparse_tensor.h" namespace arrow { diff --git a/cpp/src/arrow/python/numpy_internal.h b/python/pyarrow/src/numpy_internal.h similarity index 98% rename from cpp/src/arrow/python/numpy_internal.h rename to python/pyarrow/src/numpy_internal.h index b9b632f9f9a12..d408e908442b8 100644 --- a/cpp/src/arrow/python/numpy_internal.h +++ b/python/pyarrow/src/numpy_internal.h @@ -19,11 +19,11 @@ #pragma once -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" #include "arrow/status.h" -#include "arrow/python/platform.h" +#include "platform.h" #include #include diff --git a/cpp/src/arrow/python/numpy_interop.h b/python/pyarrow/src/numpy_interop.h similarity index 98% rename from cpp/src/arrow/python/numpy_interop.h rename to python/pyarrow/src/numpy_interop.h index ce7baed259f91..d212e014ec6f9 100644 --- a/cpp/src/arrow/python/numpy_interop.h +++ b/python/pyarrow/src/numpy_interop.h @@ -17,7 +17,7 @@ #pragma once -#include "arrow/python/platform.h" // IWYU pragma: export +#include "platform.h" // IWYU pragma: export #include // IWYU pragma: export diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/numpy_to_arrow.cc similarity index 98% rename from cpp/src/arrow/python/numpy_to_arrow.cc rename to python/pyarrow/src/numpy_to_arrow.cc index 2727ce32f4494..cd01577d71dcb 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/python/pyarrow/src/numpy_to_arrow.cc @@ -17,8 +17,8 @@ // Functions for pandas conversion via NumPy -#include "arrow/python/numpy_to_arrow.h" -#include "arrow/python/numpy_interop.h" +#include "numpy_to_arrow.h" +#include "numpy_interop.h" #include #include @@ -49,14 +49,14 @@ #include "arrow/compute/api_scalar.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" -#include "arrow/python/helpers.h" -#include "arrow/python/iterators.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/numpy_internal.h" -#include "arrow/python/python_to_arrow.h" -#include "arrow/python/type_traits.h" +#include "common.h" +#include "datetime.h" +#include "helpers.h" +#include "iterators.h" +#include "numpy_convert.h" +#include "numpy_internal.h" +#include "python_to_arrow.h" +#include "type_traits.h" namespace arrow { diff --git a/cpp/src/arrow/python/numpy_to_arrow.h b/python/pyarrow/src/numpy_to_arrow.h similarity index 97% rename from cpp/src/arrow/python/numpy_to_arrow.h rename to python/pyarrow/src/numpy_to_arrow.h index b6cd093e55420..13924b1ac1ca6 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.h +++ b/python/pyarrow/src/numpy_to_arrow.h @@ -19,12 +19,12 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include "arrow/compute/api.h" -#include "arrow/python/visibility.h" +#include "visibility.h" namespace arrow { diff --git a/cpp/src/arrow/python/parquet_encryption.cc b/python/pyarrow/src/parquet_encryption.cc similarity index 98% rename from cpp/src/arrow/python/parquet_encryption.cc rename to python/pyarrow/src/parquet_encryption.cc index a5f924bce783e..b64c77f2b9fc6 100644 --- a/cpp/src/arrow/python/parquet_encryption.cc +++ b/python/pyarrow/src/parquet_encryption.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/parquet_encryption.h" +#include "parquet_encryption.h" #include "parquet/exception.h" namespace arrow { diff --git a/cpp/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/parquet_encryption.h similarity index 98% rename from cpp/src/arrow/python/parquet_encryption.h rename to python/pyarrow/src/parquet_encryption.h index 23ee478348ecd..0079b6eed5694 100644 --- a/cpp/src/arrow/python/parquet_encryption.h +++ b/python/pyarrow/src/parquet_encryption.h @@ -19,8 +19,8 @@ #include -#include "arrow/python/common.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "visibility.h" #include "arrow/util/macros.h" #include "parquet/encryption/crypto_factory.h" #include "parquet/encryption/kms_client.h" diff --git a/cpp/src/arrow/python/pch.h b/python/pyarrow/src/pch.h similarity index 96% rename from cpp/src/arrow/python/pch.h rename to python/pyarrow/src/pch.h index d1d688b4f17c4..0afcf7938dfce 100644 --- a/cpp/src/arrow/python/pch.h +++ b/python/pyarrow/src/pch.h @@ -21,4 +21,4 @@ // may incur a slowdown, since it makes the precompiled header heavier to load. #include "arrow/pch.h" -#include "arrow/python/platform.h" +#include "platform.h" diff --git a/cpp/src/arrow/python/platform.h b/python/pyarrow/src/platform.h similarity index 100% rename from cpp/src/arrow/python/platform.h rename to python/pyarrow/src/platform.h diff --git a/cpp/src/arrow/python/pyarrow.cc b/python/pyarrow/src/pyarrow.cc similarity index 95% rename from cpp/src/arrow/python/pyarrow.cc rename to python/pyarrow/src/pyarrow.cc index c3244b74bf512..b567a68df3e24 100644 --- a/cpp/src/arrow/python/pyarrow.cc +++ b/python/pyarrow/src/pyarrow.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/pyarrow.h" +#include "pyarrow.h" #include #include @@ -25,10 +25,10 @@ #include "arrow/tensor.h" #include "arrow/type.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" +#include "common.h" +#include "datetime.h" namespace { -#include "arrow/python/pyarrow_api.h" +#include "pyarrow_api.h" } namespace arrow { diff --git a/cpp/src/arrow/python/pyarrow.h b/python/pyarrow/src/pyarrow.h similarity index 97% rename from cpp/src/arrow/python/pyarrow.h rename to python/pyarrow/src/pyarrow.h index 4c365081d70ca..a63e29a33fb74 100644 --- a/cpp/src/arrow/python/pyarrow.h +++ b/python/pyarrow/src/pyarrow.h @@ -17,11 +17,11 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/sparse_tensor.h" diff --git a/cpp/src/arrow/python/pyarrow_api.h b/python/pyarrow/src/pyarrow_api.h similarity index 100% rename from cpp/src/arrow/python/pyarrow_api.h rename to python/pyarrow/src/pyarrow_api.h diff --git a/cpp/src/arrow/python/pyarrow_lib.h b/python/pyarrow/src/pyarrow_lib.h similarity index 100% rename from cpp/src/arrow/python/pyarrow_lib.h rename to python/pyarrow/src/pyarrow_lib.h diff --git a/cpp/src/arrow/python/python_test.cc b/python/pyarrow/src/python_test.cc similarity index 98% rename from cpp/src/arrow/python/python_test.cc rename to python/pyarrow/src/python_test.cc index c465fabc68081..54086faa7ca76 100644 --- a/cpp/src/arrow/python/python_test.cc +++ b/python/pyarrow/src/python_test.cc @@ -21,7 +21,7 @@ #include #include -#include "arrow/python/platform.h" +#include "platform.h" #include "arrow/array.h" #include "arrow/array/builder_binary.h" @@ -30,12 +30,12 @@ #include "arrow/util/decimal.h" #include "arrow/util/optional.h" -#include "arrow/python/arrow_to_pandas.h" -#include "arrow/python/decimal.h" -#include "arrow/python/helpers.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/numpy_interop.h" -#include "arrow/python/python_to_arrow.h" +#include "arrow_to_pandas.h" +#include "decimal.h" +#include "helpers.h" +#include "numpy_convert.h" +#include "numpy_interop.h" +#include "python_to_arrow.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/python_to_arrow.cc similarity index 99% rename from cpp/src/arrow/python/python_to_arrow.cc rename to python/pyarrow/src/python_to_arrow.cc index 024c9c575c311..4ca19049a7382 100644 --- a/cpp/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/python_to_arrow.cc @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/python_to_arrow.h" -#include "arrow/python/numpy_interop.h" +#include "python_to_arrow.h" +#include "numpy_interop.h" #include @@ -44,13 +44,13 @@ #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" -#include "arrow/python/datetime.h" -#include "arrow/python/decimal.h" -#include "arrow/python/helpers.h" -#include "arrow/python/inference.h" -#include "arrow/python/iterators.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/type_traits.h" +#include "datetime.h" +#include "decimal.h" +#include "helpers.h" +#include "inference.h" +#include "iterators.h" +#include "numpy_convert.h" +#include "type_traits.h" #include "arrow/visit_type_inline.h" namespace arrow { @@ -1114,13 +1114,17 @@ Status ConvertToSequenceAndInferSize(PyObject* obj, PyObject** seq, int64_t* siz RETURN_IF_PYERROR(); for (i = 0; i < n; i++) { PyObject* item = PyIter_Next(iter); - if (!item) break; + if (!item) { + // either an error occurred or the iterator ended + RETURN_IF_PYERROR(); + break; + } PyList_SET_ITEM(lst, i, item); } // Shrink list if len(iterator) < size if (i < n && PyList_SetSlice(lst, i, n, NULL)) { Py_DECREF(lst); - return Status::UnknownError("failed to resize list"); + RETURN_IF_PYERROR(); } *seq = lst; *size = std::min(i, *size); diff --git a/cpp/src/arrow/python/python_to_arrow.h b/python/pyarrow/src/python_to_arrow.h similarity index 95% rename from cpp/src/arrow/python/python_to_arrow.h rename to python/pyarrow/src/python_to_arrow.h index d167996ba8da6..ca246a87401ee 100644 --- a/cpp/src/arrow/python/python_to_arrow.h +++ b/python/pyarrow/src/python_to_arrow.h @@ -20,16 +20,16 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/type.h" #include "arrow/util/macros.h" -#include "arrow/python/common.h" +#include "common.h" namespace arrow { diff --git a/cpp/src/arrow/python/serialize.cc b/python/pyarrow/src/serialize.cc similarity index 99% rename from cpp/src/arrow/python/serialize.cc rename to python/pyarrow/src/serialize.cc index ad079cbd9c704..c7c925fa19806 100644 --- a/cpp/src/arrow/python/serialize.cc +++ b/python/pyarrow/src/serialize.cc @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/serialize.h" -#include "arrow/python/numpy_interop.h" +#include "serialize.h" +#include "numpy_interop.h" #include #include @@ -42,13 +42,13 @@ #include "arrow/tensor.h" #include "arrow/util/logging.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" -#include "arrow/python/helpers.h" -#include "arrow/python/iterators.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/platform.h" -#include "arrow/python/pyarrow.h" +#include "common.h" +#include "datetime.h" +#include "helpers.h" +#include "iterators.h" +#include "numpy_convert.h" +#include "platform.h" +#include "pyarrow.h" constexpr int32_t kMaxRecursionDepth = 100; diff --git a/cpp/src/arrow/python/serialize.h b/python/pyarrow/src/serialize.h similarity index 99% rename from cpp/src/arrow/python/serialize.h rename to python/pyarrow/src/serialize.h index fd207d3e06903..fd78c9a309ad3 100644 --- a/cpp/src/arrow/python/serialize.h +++ b/python/pyarrow/src/serialize.h @@ -21,7 +21,7 @@ #include #include "arrow/ipc/options.h" -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/sparse_tensor.h" #include "arrow/status.h" diff --git a/cpp/src/arrow/python/type_traits.h b/python/pyarrow/src/type_traits.h similarity index 99% rename from cpp/src/arrow/python/type_traits.h rename to python/pyarrow/src/type_traits.h index a941577f76558..4cdfe9d8d62b6 100644 --- a/cpp/src/arrow/python/type_traits.h +++ b/python/pyarrow/src/type_traits.h @@ -19,12 +19,12 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" #include diff --git a/cpp/src/arrow/python/udf.cc b/python/pyarrow/src/udf.cc similarity index 98% rename from cpp/src/arrow/python/udf.cc rename to python/pyarrow/src/udf.cc index 81bf47c0ade00..51cea5e6c64a6 100644 --- a/cpp/src/arrow/python/udf.cc +++ b/python/pyarrow/src/udf.cc @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/udf.h" +#include "udf.h" #include "arrow/compute/function.h" -#include "arrow/python/common.h" +#include "common.h" namespace arrow { diff --git a/cpp/src/arrow/python/udf.h b/python/pyarrow/src/udf.h similarity index 93% rename from cpp/src/arrow/python/udf.h rename to python/pyarrow/src/udf.h index 4ab3e7cc72b27..52f22b4cb4f3e 100644 --- a/cpp/src/arrow/python/udf.h +++ b/python/pyarrow/src/udf.h @@ -20,11 +20,11 @@ #include "arrow/compute/exec.h" #include "arrow/compute/function.h" #include "arrow/compute/registry.h" -#include "arrow/python/platform.h" +#include "platform.h" -#include "arrow/python/common.h" -#include "arrow/python/pyarrow.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "pyarrow.h" +#include "visibility.h" namespace arrow { diff --git a/cpp/src/arrow/python/util/CMakeLists.txt b/python/pyarrow/src/util/CMakeLists.txt similarity index 100% rename from cpp/src/arrow/python/util/CMakeLists.txt rename to python/pyarrow/src/util/CMakeLists.txt diff --git a/cpp/src/arrow/python/util/test_main.cc b/python/pyarrow/src/util/test_main.cc similarity index 89% rename from cpp/src/arrow/python/util/test_main.cc rename to python/pyarrow/src/util/test_main.cc index dd7f379bdd4cc..3ee1657e6440b 100644 --- a/cpp/src/arrow/python/util/test_main.cc +++ b/python/pyarrow/src/util/test_main.cc @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/platform.h" +#include "../platform.h" #include -#include "arrow/python/datetime.h" -#include "arrow/python/init.h" -#include "arrow/python/pyarrow.h" +#include "../datetime.h" +#include "../init.h" +#include "../pyarrow.h" int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/cpp/src/arrow/python/visibility.h b/python/pyarrow/src/visibility.h similarity index 100% rename from cpp/src/arrow/python/visibility.h rename to python/pyarrow/src/visibility.h diff --git a/python/pyarrow/substrait.py b/python/pyarrow/substrait.py index e3ff28f4ebaea..590d03521fe50 100644 --- a/python/pyarrow/substrait.py +++ b/python/pyarrow/substrait.py @@ -16,5 +16,6 @@ # under the License. from pyarrow._substrait import ( # noqa + get_supported_functions, run_query, ) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 17f88aca4eb0f..931677f9848ca 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -697,7 +697,10 @@ cdef class ChunkedArray(_PandasConvertible): 100 ] """ - return concat_arrays(self.chunks) + if self.num_chunks == 0: + return array([], type=self.type) + else: + return concat_arrays(self.chunks) def unique(self): """ @@ -1300,7 +1303,8 @@ def chunked_array(arrays, type=None): cdef: Array arr vector[shared_ptr[CArray]] c_arrays - shared_ptr[CChunkedArray] sp_chunked_array + shared_ptr[CChunkedArray] c_result + shared_ptr[CDataType] c_type type = ensure_type(type, allow_none=True) @@ -1315,25 +1319,13 @@ def chunked_array(arrays, type=None): # subsequent arrays to the firstly inferred array type # it also spares the inference overhead after the first chunk type = arr.type - else: - if arr.type != type: - raise TypeError( - "All array chunks must have type {}".format(type) - ) c_arrays.push_back(arr.sp_array) - if c_arrays.size() == 0 and type is None: - raise ValueError("When passing an empty collection of arrays " - "you must also pass the data type") - - sp_chunked_array.reset( - new CChunkedArray(c_arrays, pyarrow_unwrap_data_type(type)) - ) + c_type = pyarrow_unwrap_data_type(type) with nogil: - check_status(sp_chunked_array.get().Validate()) - - return pyarrow_wrap_chunked_array(sp_chunked_array) + c_result = GetResultValue(CChunkedArray.Make(c_arrays, c_type)) + return pyarrow_wrap_chunked_array(c_result) cdef _schema_from_arrays(arrays, names, metadata, shared_ptr[CSchema]* schema): @@ -2010,7 +2002,7 @@ cdef class RecordBatch(_PandasConvertible): >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], ... names=["n_legs", "animals"]) >>> batch.serialize() - + """ cdef shared_ptr[CBuffer] buffer cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults() diff --git a/python/pyarrow/tensorflow/plasma_op.cc b/python/pyarrow/tensorflow/plasma_op.cc index bf4eec7891235..6445ba75e5e60 100644 --- a/python/pyarrow/tensorflow/plasma_op.cc +++ b/python/pyarrow/tensorflow/plasma_op.cc @@ -36,8 +36,8 @@ #include "arrow/util/logging.h" // These headers do not include Python.h -#include "arrow/python/deserialize.h" -#include "arrow/python/serialize.h" +#include "deserialize.h" +#include "serialize.h" #include "plasma/client.h" diff --git a/python/pyarrow/tests/parquet/encryption.py b/python/pyarrow/tests/parquet/encryption.py index b0465ec6ec899..d07f8ae273520 100644 --- a/python/pyarrow/tests/parquet/encryption.py +++ b/python/pyarrow/tests/parquet/encryption.py @@ -20,7 +20,8 @@ class InMemoryKmsClient(pe.KmsClient): - """This is a mock class implementation of KmsClient, built for testing only. + """This is a mock class implementation of KmsClient, built for testing + only. """ def __init__(self, config): diff --git a/python/pyarrow/tests/parquet/test_encryption.py b/python/pyarrow/tests/parquet/test_encryption.py index 924339797f0b9..db9bc52e4698b 100644 --- a/python/pyarrow/tests/parquet/test_encryption.py +++ b/python/pyarrow/tests/parquet/test_encryption.py @@ -109,7 +109,7 @@ def write_encrypted_parquet(path, table, encryption_config, kms_connection_config, crypto_factory): file_encryption_properties = crypto_factory.file_encryption_properties( kms_connection_config, encryption_config) - assert(file_encryption_properties is not None) + assert file_encryption_properties is not None with pq.ParquetWriter( path, table.schema, encryption_properties=file_encryption_properties) as writer: @@ -120,13 +120,13 @@ def read_encrypted_parquet(path, decryption_config, kms_connection_config, crypto_factory): file_decryption_properties = crypto_factory.file_decryption_properties( kms_connection_config, decryption_config) - assert(file_decryption_properties is not None) + assert file_decryption_properties is not None meta = pq.read_metadata( path, decryption_properties=file_decryption_properties) - assert(meta.num_columns == 3) + assert meta.num_columns == 3 schema = pq.read_schema( path, decryption_properties=file_decryption_properties) - assert(len(schema.names) == 3) + assert len(schema.names) == 3 result = pq.ParquetFile( path, decryption_properties=file_decryption_properties) @@ -350,14 +350,14 @@ def kms_factory(kms_connection_configuration): def test_encrypted_parquet_encryption_configuration(): def validate_encryption_configuration(encryption_config): - assert(FOOTER_KEY_NAME == encryption_config.footer_key) - assert(["a", "b"] == encryption_config.column_keys[COL_KEY_NAME]) - assert("AES_GCM_CTR_V1" == encryption_config.encryption_algorithm) - assert(encryption_config.plaintext_footer) - assert(not encryption_config.double_wrapping) - assert(timedelta(minutes=10.0) == encryption_config.cache_lifetime) - assert(not encryption_config.internal_key_material) - assert(192 == encryption_config.data_key_length_bits) + assert FOOTER_KEY_NAME == encryption_config.footer_key + assert ["a", "b"] == encryption_config.column_keys[COL_KEY_NAME] + assert "AES_GCM_CTR_V1" == encryption_config.encryption_algorithm + assert encryption_config.plaintext_footer + assert not encryption_config.double_wrapping + assert timedelta(minutes=10.0) == encryption_config.cache_lifetime + assert not encryption_config.internal_key_material + assert 192 == encryption_config.data_key_length_bits encryption_config = pe.EncryptionConfiguration( footer_key=FOOTER_KEY_NAME, @@ -386,20 +386,20 @@ def validate_encryption_configuration(encryption_config): def test_encrypted_parquet_decryption_configuration(): decryption_config = pe.DecryptionConfiguration( cache_lifetime=timedelta(minutes=10.0)) - assert(timedelta(minutes=10.0) == decryption_config.cache_lifetime) + assert timedelta(minutes=10.0) == decryption_config.cache_lifetime decryption_config_1 = pe.DecryptionConfiguration() decryption_config_1.cache_lifetime = timedelta(minutes=10.0) - assert(timedelta(minutes=10.0) == decryption_config_1.cache_lifetime) + assert timedelta(minutes=10.0) == decryption_config_1.cache_lifetime def test_encrypted_parquet_kms_configuration(): def validate_kms_connection_config(kms_connection_config): - assert("Instance1" == kms_connection_config.kms_instance_id) - assert("URL1" == kms_connection_config.kms_instance_url) - assert("MyToken" == kms_connection_config.key_access_token) - assert({"key1": "key_material_1", "key2": "key_material_2"} == - kms_connection_config.custom_kms_conf) + assert "Instance1" == kms_connection_config.kms_instance_id + assert "URL1" == kms_connection_config.kms_instance_url + assert "MyToken" == kms_connection_config.key_access_token + assert ({"key1": "key_material_1", "key2": "key_material_2"} == + kms_connection_config.custom_kms_conf) kms_connection_config = pe.KmsConnectionConfig( kms_instance_id="Instance1", @@ -524,7 +524,7 @@ def kms_factory(kms_connection_configuration): # Read with decryption properties file_decryption_properties = crypto_factory.file_decryption_properties( kms_connection_config, decryption_config) - assert(file_decryption_properties is not None) + assert file_decryption_properties is not None result = pq.ParquetFile( path, decryption_properties=file_decryption_properties) diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py index b36ea60658dd6..e4c9a757fc09f 100644 --- a/python/pyarrow/tests/parquet/test_metadata.py +++ b/python/pyarrow/tests/parquet/test_metadata.py @@ -24,6 +24,8 @@ import pyarrow as pa from pyarrow.tests.parquet.common import _check_roundtrip, make_sample_file +from pyarrow.fs import LocalFileSystem +from pyarrow.tests import util try: import pyarrow.parquet as pq @@ -533,6 +535,41 @@ def test_metadata_exceeds_message_size(): metadata = pq.read_metadata(pa.BufferReader(buf)) +def test_metadata_schema_filesystem(tempdir): + table = pa.table({"a": [1, 2, 3]}) + + # URI writing to local file. + fname = "data.parquet" + file_path = str(tempdir / fname) + file_uri = 'file:///' + file_path + + pq.write_table(table, file_path) + + # Get expected `metadata` from path. + metadata = pq.read_metadata(tempdir / fname) + schema = table.schema + + assert pq.read_metadata(file_uri).equals(metadata) + assert pq.read_metadata( + file_path, filesystem=LocalFileSystem()).equals(metadata) + assert pq.read_metadata( + fname, filesystem=f'file:///{tempdir}').equals(metadata) + + assert pq.read_schema(file_uri).equals(schema) + assert pq.read_schema( + file_path, filesystem=LocalFileSystem()).equals(schema) + assert pq.read_schema( + fname, filesystem=f'file:///{tempdir}').equals(schema) + + with util.change_cwd(tempdir): + # Pass `filesystem` arg + assert pq.read_metadata( + fname, filesystem=LocalFileSystem()).equals(metadata) + + assert pq.read_schema( + fname, filesystem=LocalFileSystem()).equals(schema) + + def test_metadata_equals(): table = pa.table({"a": [1, 2, 3]}) with pa.BufferOutputStream() as out: diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py index 9b528b1859a92..4fb9335961d05 100644 --- a/python/pyarrow/tests/parquet/test_parquet_file.py +++ b/python/pyarrow/tests/parquet/test_parquet_file.py @@ -17,6 +17,7 @@ import io import os +from unittest import mock import pytest @@ -277,3 +278,54 @@ def test_pre_buffer(pre_buffer): buf.seek(0) pf = pq.ParquetFile(buf, pre_buffer=pre_buffer) assert pf.read().num_rows == N + + +def test_parquet_file_explicitly_closed(tempdir): + """ + Unopened files should be closed explicitly after use, + and previously opened files should be left open. + Applies to read_table, ParquetDataset, and ParquetFile + """ + # create test parquet file + fn = tempdir.joinpath('file.parquet') + table = pa.table({'col1': [0, 1], 'col2': [0, 1]}) + pq.write_table(table, fn) + + # read_table (legacy) with opened file (will leave open) + with open(fn, 'rb') as f: + pq.read_table(f, use_legacy_dataset=True) + assert not f.closed # Didn't close it internally after read_table + + # read_table (legacy) with unopened file (will close) + with mock.patch.object(pq.ParquetFile, "close") as mock_close: + pq.read_table(fn, use_legacy_dataset=True) + mock_close.assert_called() + + # ParquetDataset test (legacy) with unopened file (will close) + with mock.patch.object(pq.ParquetFile, "close") as mock_close: + pq.ParquetDataset(fn, use_legacy_dataset=True).read() + mock_close.assert_called() + + # ParquetDataset test (legacy) with opened file (will leave open) + with open(fn, 'rb') as f: + # ARROW-8075: support ParquetDataset from file-like, not just path-like + with pytest.raises(TypeError, match='not a path-like object'): + pq.ParquetDataset(f, use_legacy_dataset=True).read() + assert not f.closed + + # ParquetFile with opened file (will leave open) + with open(fn, 'rb') as f: + with pq.ParquetFile(f) as p: + p.read() + assert not f.closed + assert not p.closed + assert not f.closed # opened input file was not closed + assert not p.closed # parquet file obj reports as not closed + assert f.closed + assert p.closed # parquet file being closed reflects underlying file + + # ParquetFile with unopened file (will close) + with pq.ParquetFile(fn) as p: + p.read() + assert not p.closed + assert p.closed # parquet file obj reports as closed diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 2bdec412f1f82..f2820b6e25f4c 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -394,6 +394,14 @@ def test_mode_chunked_array(): assert len(pc.mode(arr)) == 0 +def test_empty_chunked_array(): + msg = "cannot construct ChunkedArray from empty vector and omitted type" + with pytest.raises(pa.ArrowInvalid, match=msg): + pa.chunked_array([]) + + pa.chunked_array([], type=pa.int8()) + + def test_variance(): data = [1, 2, 3, 4, 5, 6, 7, 8] assert pc.variance(data).as_py() == 5.25 diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 1eb1d1b08ca8e..732164f9e132e 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -125,6 +125,14 @@ def test_infinite_iterator(): assert arr1.equals(expected) +def test_failing_iterator(): + with pytest.raises(ZeroDivisionError): + pa.array((1 // 0 for x in range(10))) + # ARROW-17253 + with pytest.raises(ZeroDivisionError): + pa.array((1 // 0 for x in range(10)), size=10) + + def _as_list(xs): return xs diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py index 89fca4190ecf3..85d65383d8ba7 100644 --- a/python/pyarrow/tests/test_cython.py +++ b/python/pyarrow/tests/test_cython.py @@ -81,6 +81,8 @@ def check_cython_example_module(mod): mod.cast_scalar(scal, pa.list_(pa.int64())) +@pytest.mark.skipif(sys.platform == "win32", + reason="ARROW-17172: currently fails on windows") @pytest.mark.cython def test_cython_api(tmpdir): """ @@ -138,7 +140,6 @@ def test_cython_api(tmpdir): subprocess_env[var] = delim.join( pa.get_library_dirs() + [subprocess_env.get(var, '')] ) - subprocess.check_call([sys.executable, '-c', code], stdout=subprocess.PIPE, env=subprocess_env) diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index b900e694a91da..3dc9c3beb6ee1 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -4706,3 +4706,28 @@ def test_dataset_filter(tempdir): "colA": [1, 2], "col2": ["a", "b"] }) + + +def test_write_dataset_with_scanner_use_projected_schema(tempdir): + """ + Ensure the projected schema is used to validate partitions for scanner + + https://issues.apache.org/jira/browse/ARROW-17228 + """ + table = pa.table([pa.array(range(20))], names=["original_column"]) + table_dataset = ds.dataset(table) + columns = { + "renamed_column": ds.field("original_column"), + } + scanner = table_dataset.scanner(columns=columns) + + ds.write_dataset( + scanner, tempdir, partitioning=["renamed_column"], format="ipc") + with ( + pytest.raises( + KeyError, match=r"'Column original_column does not exist in schema" + ) + ): + ds.write_dataset( + scanner, tempdir, partitioning=["original_column"], format="ipc" + ) diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 05ebf4ed4c72f..94511445412a2 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -1093,7 +1093,9 @@ def test_gcs_options(): @pytest.mark.s3 def test_s3_options(): - from pyarrow.fs import S3FileSystem + from pyarrow.fs import (AwsDefaultS3RetryStrategy, + AwsStandardS3RetryStrategy, S3FileSystem, + S3RetryStrategy) fs = S3FileSystem(access_key='access', secret_key='secret', session_token='token', region='us-east-2', @@ -1107,6 +1109,15 @@ def test_s3_options(): assert isinstance(fs, S3FileSystem) assert pickle.loads(pickle.dumps(fs)) == fs + # Note that the retry strategy won't survive pickling for now + fs = S3FileSystem( + retry_strategy=AwsStandardS3RetryStrategy(max_attempts=5)) + assert isinstance(fs, S3FileSystem) + + fs = S3FileSystem( + retry_strategy=AwsDefaultS3RetryStrategy(max_attempts=5)) + assert isinstance(fs, S3FileSystem) + fs2 = S3FileSystem(role_arn='role') assert isinstance(fs2, S3FileSystem) assert pickle.loads(pickle.dumps(fs2)) == fs2 @@ -1131,6 +1142,15 @@ def test_s3_options(): assert isinstance(fs, S3FileSystem) assert pickle.loads(pickle.dumps(fs)) == fs + fs = S3FileSystem(request_timeout=0.5, connect_timeout=0.25) + assert isinstance(fs, S3FileSystem) + assert pickle.loads(pickle.dumps(fs)) == fs + + fs2 = S3FileSystem(request_timeout=0.25, connect_timeout=0.5) + assert isinstance(fs2, S3FileSystem) + assert pickle.loads(pickle.dumps(fs2)) == fs2 + assert fs2 != fs + with pytest.raises(ValueError): S3FileSystem(access_key='access') with pytest.raises(ValueError): @@ -1151,6 +1171,8 @@ def test_s3_options(): S3FileSystem(role_arn="arn", anonymous=True) with pytest.raises(ValueError): S3FileSystem(default_metadata=["foo", "bar"]) + with pytest.raises(ValueError): + S3FileSystem(retry_strategy=S3RetryStrategy()) @pytest.mark.s3 diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py index 89c42648d249a..1990198d9f120 100644 --- a/python/pyarrow/tests/test_gdb.py +++ b/python/pyarrow/tests/test_gdb.py @@ -154,7 +154,7 @@ def select_frame(self, func_name): # but it's not available on old GDB versions (such as 8.1.1), # so instead parse the stack trace for a matching frame number. out = self.run_command("info stack") - pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name) + " " + pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name) + r"\b" m = re.search(pat, out) if m is None: pytest.fail(f"Could not select frame for function {func_name}") diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index f04ae23ce8039..a6488d70df53e 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -126,6 +126,50 @@ def test_python_file_read(): pa.PythonFile(StringIO(), mode='r') +@pytest.mark.parametrize("nbytes", (-1, 0, 1, 5, 100)) +@pytest.mark.parametrize("file_offset", (-1, 0, 5, 100)) +def test_python_file_get_stream(nbytes, file_offset): + + data = b'data1data2data3data4data5' + + f = pa.PythonFile(BytesIO(data), mode='r') + + # negative nbytes or offsets don't make sense here, raise ValueError + if nbytes < 0 or file_offset < 0: + with pytest.raises(pa.ArrowInvalid, + match="should be a positive value"): + f.get_stream(file_offset=file_offset, nbytes=nbytes) + f.close() + return + else: + stream = f.get_stream(file_offset=file_offset, nbytes=nbytes) + + # Subsequent calls to 'read' should match behavior if same + # data passed to BytesIO where get_stream should handle if + # nbytes/file_offset results in no bytes b/c out of bounds. + start = min(file_offset, len(data)) + end = min(file_offset + nbytes, len(data)) + buf = BytesIO(data[start:end]) + + # read some chunks + assert stream.read(nbytes=4) == buf.read(4) + assert stream.read(nbytes=6) == buf.read(6) + + # Read to end of each stream + assert stream.read() == buf.read() + + # Try reading past the stream + n = len(data) * 2 + assert stream.read(n) == buf.read(n) + + # NativeFile[CInputStream] is not seekable + with pytest.raises(OSError, match="seekable"): + stream.seek(0) + + stream.close() + assert stream.closed + + def test_python_file_read_at(): data = b'some sample data' @@ -675,6 +719,12 @@ def test_compression_level(compression): if not Codec.is_available(compression): pytest.skip("{} support is not built".format(compression)) + codec = Codec(compression) + if codec.name == "snappy": + assert codec.compression_level is None + else: + assert isinstance(codec.compression_level, int) + # These codecs do not support a compression level no_level = ['snappy'] if compression in no_level: diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py index 1b4aaaed6db29..b7192867dcf05 100644 --- a/python/pyarrow/tests/test_ipc.py +++ b/python/pyarrow/tests/test_ipc.py @@ -848,8 +848,8 @@ def run(self): connection.close() def get_result(self): - return(self._schema, self._table if self._do_read_all - else self._batches) + return (self._schema, self._table if self._do_read_all + else self._batches) class SocketStreamFixture(IpcFixture): diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py index f05d68a95a14f..c8fa6afcb9ffa 100644 --- a/python/pyarrow/tests/test_substrait.py +++ b/python/pyarrow/tests/test_substrait.py @@ -145,3 +145,23 @@ def test_binary_conversion_with_json_options(tmpdir): res_tb = reader.read_all() assert table.select(["bar"]) == res_tb.select(["bar"]) + + +# Substrait has not finalized what the URI should be for standard functions +# In the meantime, lets just check the suffix +def has_function(fns, ext_file, fn_name): + suffix = f'{ext_file}#{fn_name}' + for fn in fns: + if fn.endswith(suffix): + return True + return False + + +def test_get_supported_functions(): + supported_functions = pa._substrait.get_supported_functions() + # It probably doesn't make sense to exhaustively verfiy this list but + # we can check a sample aggregate and a sample non-aggregate entry + assert has_function(supported_functions, + 'functions_arithmetic.yaml', 'add') + assert has_function(supported_functions, + 'functions_arithmetic.yaml', 'sum') diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index 6474974b4fbd8..c0c60da6272f2 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -97,10 +97,7 @@ def test_chunked_array_construction(): assert len(arr) == 3 assert len(arr.chunks) == 2 - msg = ( - "When passing an empty collection of arrays you must also pass the " - "data type" - ) + msg = "cannot construct ChunkedArray from empty vector and omitted type" with pytest.raises(ValueError, match=msg): assert pa.chunked_array([]) @@ -118,6 +115,16 @@ def test_combine_chunks(): assert res.equals(expected) +def test_chunked_array_can_combine_chunks_with_no_chunks(): + # https://issues.apache.org/jira/browse/ARROW-17256 + assert pa.chunked_array([], type=pa.bool_()).combine_chunks() == pa.array( + [], type=pa.bool_() + ) + assert pa.chunked_array( + [pa.array([], type=pa.bool_())], type=pa.bool_() + ).combine_chunks() == pa.array([], type=pa.bool_()) + + def test_chunked_array_to_numpy(): data = pa.chunked_array([ [1, 2, 3], @@ -133,14 +140,15 @@ def test_chunked_array_to_numpy(): def test_chunked_array_mismatch_types(): - with pytest.raises(TypeError): + msg = "chunks must all be same type" + with pytest.raises(TypeError, match=msg): # Given array types are different pa.chunked_array([ pa.array([1, 2, 3]), pa.array([1., 2., 3.]) ]) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match=msg): # Given array type is different from explicit type argument pa.chunked_array([pa.array([1, 2, 3])], type=pa.float64()) diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 8cb7cea684274..0ef9f5a86ec6f 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -577,14 +577,24 @@ def test_struct_type(): assert ty['b'] == ty[2] + assert ty['b'] == ty.field('b') + + assert ty[2] == ty.field(2) + # Not found with pytest.raises(KeyError): ty['c'] + with pytest.raises(KeyError): + ty.field('c') + # Neither integer nor string with pytest.raises(TypeError): ty[None] + with pytest.raises(TypeError): + ty.field(None) + for a, b in zip(ty, fields): a == b @@ -634,6 +644,7 @@ def test_union_type(): def check_fields(ty, fields): assert ty.num_fields == len(fields) assert [ty[i] for i in range(ty.num_fields)] == fields + assert [ty.field(i) for i in range(ty.num_fields)] == fields fields = [pa.field('x', pa.list_(pa.int32())), pa.field('y', pa.binary())] diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 8407f95c984c3..d37363e06ff30 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -429,12 +429,23 @@ cdef class StructType(DataType): Examples -------- >>> import pyarrow as pa + + Accessing fields using direct indexing: + >>> struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()}) >>> struct_type[0] pyarrow.Field >>> struct_type['y'] pyarrow.Field + Accessing fields using ``field()``: + + >>> struct_type.field(1) + pyarrow.Field + >>> struct_type.field('x') + pyarrow.Field + + # Creating a schema from the struct type's fields: >>> pa.schema(list(struct_type)) x: int32 y: string @@ -494,6 +505,41 @@ cdef class StructType(DataType): """ return self.struct_type.GetFieldIndex(tobytes(name)) + def field(self, i): + """ + Select a field by its column name or numeric index. + + Parameters + ---------- + i : int or str + + Returns + ------- + pyarrow.Field + + Examples + -------- + + >>> import pyarrow as pa + >>> struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()}) + + Select the second field: + + >>> struct_type.field(1) + pyarrow.Field + + Select the field named 'x': + + >>> struct_type.field('x') + pyarrow.Field + """ + if isinstance(i, (bytes, str)): + return self.field_by_name(i) + elif isinstance(i, int): + return DataType.field(self, i) + else: + raise TypeError('Expected integer or string index') + def get_all_field_indices(self, name): """ Return sorted list of indices for the fields with the given name. @@ -525,13 +571,10 @@ cdef class StructType(DataType): def __getitem__(self, i): """ Return the struct field with the given index or name. + + Alias of ``field``. """ - if isinstance(i, (bytes, str)): - return self.field_by_name(i) - elif isinstance(i, int): - return self.field(i) - else: - raise TypeError('Expected integer or string index') + return self.field(i) def __reduce__(self): return struct, (list(self),) @@ -579,9 +622,28 @@ cdef class UnionType(DataType): for i in range(len(self)): yield self[i] + def field(self, i): + """ + Return a child field by its numeric index. + + Parameters + ---------- + i : int + + Returns + ------- + pyarrow.Field + """ + if isinstance(i, int): + return DataType.field(self, i) + else: + raise TypeError('Expected integer') + def __getitem__(self, i): """ Return a child field by its index. + + Alias of ``field``. """ return self.field(i) @@ -2065,7 +2127,7 @@ cdef class Schema(_Weakrefable): Write schema to Buffer: >>> schema.serialize() - + """ cdef: shared_ptr[CBuffer] buffer diff --git a/python/setup.cfg b/python/setup.cfg index 9aaad4fa56de8..062ce2745d859 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -28,6 +28,8 @@ build-dir = doc/_build addopts = --ignore=scripts filterwarnings = error:The SparseDataFrame:FutureWarning +# Get a debug traceback when a test takes a really long time +faulthandler_timeout = 300 [pep8] ignore = E211,E225,E226,E227,E402,W504 diff --git a/python/setup.py b/python/setup.py index b572be1cee464..09623142dceb6 100755 --- a/python/setup.py +++ b/python/setup.py @@ -93,6 +93,7 @@ def build_extensions(self): _build_ext.build_extensions(self) def run(self): + self._run_cmake_pyarrow_cpp() self._run_cmake() _build_ext.run(self) @@ -227,6 +228,126 @@ def initialize_options(self): '_hdfsio', 'gandiva'] + def _run_cmake_pyarrow_cpp(self): + # check if build_type is correctly passed / set + if self.build_type.lower() not in ('release', 'debug'): + raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to " + "be 'release' or 'debug'") + + # The directory containing this setup.py + source = os.path.dirname(os.path.abspath(__file__)) + # The directory containing this PyArrow cpp CMakeLists.txt + source_pyarrow_cpp = pjoin(source, "pyarrow/src") + + # The directory for the module being built + build_cmd = self.get_finalized_command('build') + saved_cwd = os.getcwd() + build_dir = pjoin(saved_cwd, 'build', 'dist') + build_include = pjoin(saved_cwd, 'build', 'dist', 'include') + build_lib = pjoin(os.getcwd(), build_cmd.build_lib) + + # The directory containing Arrow C++ build + arrow_build_dir = os.environ.get('ARROW_BUILD_DIR', 'build') + if self.inplace: + # a bit hacky + build_lib = saved_cwd + if not os.path.isdir(build_dir): + self.mkpath(build_dir) + if not os.path.isdir(build_lib): + self.mkpath(build_lib) + if not os.path.isdir(build_include): + self.mkpath(build_include) + + # Change to the build directory + with changed_dir(build_dir): + # cmake args + cmake_options = [ + '-DCMAKE_INSTALL_PREFIX=' + str(build_dir), + '-DCMAKE_BUILD_TYPE=' + str(self.build_type.lower()), + '-DARROW_BUILD_DIR=' + str(arrow_build_dir), + '-DPYTHON_EXECUTABLE=' + str(sys.executable), + '-DPython3_EXECUTABLE=' + str(sys.executable), + ] + + # Check for specific options + def append_cmake_bool(value, varname): + cmake_options.append('-D{0}={1}'.format( + varname, 'on' if value else 'off')) + + append_cmake_bool(self.with_dataset, 'PYARROW_WITH_DATASET') + append_cmake_bool(self.with_parquet_encryption, + 'PYARROW_WITH_PARQUET_ENCRYPTION') + append_cmake_bool(self.with_hdfs, + 'PYARROW_WITH_HDFS') + + # Windows + if self.cmake_generator: + cmake_options += ['-G', self.cmake_generator] + + # build args + build_tool_args = [] + if os.environ.get('PYARROW_PARALLEL'): + build_tool_args.append('--') + build_tool_args.append( + '-j{0}'.format(os.environ['PYARROW_PARALLEL'])) + + # run cmake + print("-- Running cmake for pyarrow cpp") + self.spawn(['cmake'] + cmake_options + [source_pyarrow_cpp]) + print("-- Finished cmake for pyarrow cpp") + # run make & install + print("-- Running cmake build and install for pyarrow cpp") + self.spawn(['cmake', '--build', '.', '--config', + self.build_type, '--target', 'install'] + + build_tool_args) + print("-- Finished cmake build and install for pyarrow cpp") + + # Move the libraries to the place expected by the Python build + try: + os.makedirs(pjoin(build_lib, 'pyarrow')) + except OSError: + pass + + # helper function + def copy_libs(folder_name): + for libname in os.listdir(pjoin(build_dir, folder_name)): + if "python" in libname: + libname_path = pjoin(build_lib, "pyarrow", libname) + if os.path.exists(libname_path): + os.remove(libname_path) + print( + f"Copying {pjoin(build_dir, folder_name, libname)}" + f" to {pjoin(build_lib, 'pyarrow', libname)}") + shutil.copy(pjoin(build_dir, folder_name, libname), + pjoin(build_lib, "pyarrow")) + + # Move libraries to python/pyarrow + # For windows builds, move dll from bin + try: + copy_libs("bin") + except OSError: + pass + try: + folder_name, = (name for name in ["lib", "lib64"] + if os.path.exists(pjoin(build_dir, name))) + copy_libs(folder_name) + except ValueError: + print("There are multiple or none libraries for PyArrow cpp ") + print("installed in the python/build/dist folder. Check the ") + print("installation process and be sure there is exactly one ") + print("library folder created") + + # Copy headers to python/pyarrow/include + pyarrow_cpp_include = pjoin(build_include, "arrow", "python") + pyarrow_include = pjoin( + build_lib, "pyarrow", "include", "arrow", "python") + if os.path.exists(pyarrow_include): + shutil.rmtree(pyarrow_include) + print( + f"Copying include folder: {pyarrow_cpp_include}" + f" to {pyarrow_include}") + shutil.copytree(pyarrow_cpp_include, pyarrow_include) + def _run_cmake(self): # check if build_type is correctly passed / set if self.build_type.lower() not in ('release', 'debug'): @@ -245,6 +366,10 @@ def _run_cmake(self): if not os.path.isdir(build_temp): self.mkpath(build_temp) + if self.inplace: + # a bit hacky + build_lib = saved_cwd + # Change to the build directory with changed_dir(build_temp): # Detect if we built elsewhere @@ -266,6 +391,7 @@ def _run_cmake(self): cmake_options = [ '-DPYTHON_EXECUTABLE=%s' % sys.executable, '-DPython3_EXECUTABLE=%s' % sys.executable, + '-DPYARROW_CPP_HOME=' + str(pjoin(build_lib, "pyarrow")), static_lib_option, ] @@ -332,10 +458,6 @@ def append_cmake_bool(value, varname): build_tool_args) print("-- Finished cmake --build for pyarrow") - if self.inplace: - # a bit hacky - build_lib = saved_cwd - # Move the libraries to the place expected by the Python build try: os.makedirs(pjoin(build_lib, 'pyarrow')) @@ -354,6 +476,16 @@ def append_cmake_bool(value, varname): shutil.move(pjoin(build_prefix, 'include'), pjoin(build_lib, 'pyarrow')) + # pyarrow/include file is first deleted in the previous step + # so we need to add the PyArrow cpp include folder again + build_pyarrow_cpp_include = pjoin( + saved_cwd, 'build/dist/include') + shutil.move(pjoin( + build_pyarrow_cpp_include, "arrow", "python"), + pjoin( + build_lib, "pyarrow", "include", + "arrow", "python")) + # Move the built C-extension to the place expected by the Python # build self._found_names = [] @@ -408,8 +540,6 @@ def _bundle_arrow_cpp(self, build_prefix, build_lib): move_shared_libs(build_prefix, build_lib, "arrow_substrait") if self.with_flight: move_shared_libs(build_prefix, build_lib, "arrow_flight") - move_shared_libs(build_prefix, build_lib, - "arrow_python_flight") if self.with_dataset: move_shared_libs(build_prefix, build_lib, "arrow_dataset") if self.with_plasma: @@ -567,7 +697,7 @@ def _move_shared_libs_unix(build_prefix, build_lib, lib_name): # If the event of not running from a git clone (e.g. from a git archive # or a Python sdist), see if we can set the version number ourselves -default_version = '9.0.0-SNAPSHOT' +default_version = '10.0.0-SNAPSHOT' if (not os.path.exists('../.git') and not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')): os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \ @@ -586,7 +716,7 @@ def parse_git(root, **kwargs): """ from setuptools_scm.git import parse kwargs['describe_command'] =\ - 'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"' + 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' return parse(root, **kwargs) @@ -622,9 +752,14 @@ def has_ext_modules(foo): if strtobool(os.environ.get('PYARROW_INSTALL_TESTS', '1')): packages = find_namespace_packages(include=['pyarrow*']) + exclude_package_data = {} else: packages = find_namespace_packages(include=['pyarrow*'], exclude=["pyarrow.tests*"]) + # setuptools adds back importable packages even when excluded. + # https://github.com/pypa/setuptools/issues/3260 + # https://github.com/pypa/setuptools/issues/3340#issuecomment-1219383976 + exclude_package_data = {"pyarrow": ["tests*"]} setup( @@ -633,6 +768,7 @@ def has_ext_modules(foo): zip_safe=False, package_data={'pyarrow': ['*.pxd', '*.pyx', 'includes/*.pxd']}, include_package_data=True, + exclude_package_data=exclude_package_data, distclass=BinaryDistribution, # Dummy extension to trigger build_ext ext_modules=[Extension('__dummy__', sources=[])], diff --git a/r/DESCRIPTION b/r/DESCRIPTION index a7408d27d6548..95c1405869836 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 8.0.0.9000 +Version: 9.0.0.9000 Authors@R: c( person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), @@ -98,6 +98,7 @@ Collate: 'dplyr-distinct.R' 'dplyr-eval.R' 'dplyr-filter.R' + 'dplyr-funcs-augmented.R' 'dplyr-funcs-conditional.R' 'dplyr-funcs-datetime.R' 'dplyr-funcs-math.R' diff --git a/r/NEWS.md b/r/NEWS.md index c2ad7f86ddba6..c0bad9458d1d3 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,7 +17,9 @@ under the License. --> -# arrow 8.0.0.9000 +# arrow 9.0.0.9000 + +# arrow 9.0.0 ## Arrow dplyr queries diff --git a/r/R/compute.R b/r/R/compute.R index 0985e73a5f2d3..636c9146ca37b 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -344,7 +344,7 @@ cast_options <- function(safe = TRUE, ...) { #' @return `NULL`, invisibly #' @export #' -#' @examplesIf arrow_with_dataset() +#' @examplesIf arrow_with_dataset() && identical(Sys.getenv("NOT_CRAN"), "true") #' library(dplyr, warn.conflicts = FALSE) #' #' some_model <- lm(mpg ~ disp + cyl, data = mtcars) @@ -385,6 +385,13 @@ register_scalar_function <- function(name, fun, in_type, out_type, update_cache = TRUE ) + # User-defined functions require some special handling + # in the query engine which currently require an opt-in using + # the R_ARROW_COLLECT_WITH_UDF environment variable while this + # behaviour is stabilized. + # TODO(ARROW-17178) remove the need for this! + Sys.setenv(R_ARROW_COLLECT_WITH_UDF = "true") + invisible(NULL) } diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R index 496aaad205c39..e0181ee74f715 100644 --- a/r/R/dataset-write.R +++ b/r/R/dataset-write.R @@ -34,8 +34,9 @@ #' use the current `group_by()` columns. #' @param basename_template string template for the names of files to be written. #' Must contain `"{i}"`, which will be replaced with an autoincremented -#' integer to generate basenames of datafiles. For example, `"part-{i}.feather"` -#' will yield `"part-0.feather", ...`. +#' integer to generate basenames of datafiles. For example, `"part-{i}.arrow"` +#' will yield `"part-0.arrow", ...`. +#' If not specified, it defaults to `"part-{i}."`. #' @param hive_style logical: write partition segments as Hive-style #' (`key1=value1/key2=value2/file.ext`) or as just bare values. Default is `TRUE`. #' @param existing_data_behavior The behavior to use when there is already data @@ -133,6 +134,9 @@ write_dataset <- function(dataset, max_rows_per_group = bitwShiftL(1, 20), ...) { format <- match.arg(format) + if (format %in% c("feather", "ipc")) { + format <- "arrow" + } if (inherits(dataset, "arrow_dplyr_query")) { # partitioning vars need to be in the `select` schema dataset <- ensure_group_vars(dataset) diff --git a/r/R/dataset.R b/r/R/dataset.R index 12765fbfc0295..d86962cc1dcec 100644 --- a/r/R/dataset.R +++ b/r/R/dataset.R @@ -224,6 +224,7 @@ open_dataset <- function(sources, # and not handle_parquet_io_error() error = function(e, call = caller_env(n = 4)) { handle_parquet_io_error(e, format, call) + abort(conditionMessage(e), call = call) } ) } diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R index 3e83475a8c849..8049e46eb5db9 100644 --- a/r/R/dplyr-collect.R +++ b/r/R/dplyr-collect.R @@ -25,6 +25,8 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) { # and not handle_csv_read_error() error = function(e, call = caller_env(n = 4)) { handle_csv_read_error(e, x$.data$schema, call) + handle_augmented_field_misuse(e, call) + abort(conditionMessage(e), call = call) } ) @@ -104,10 +106,18 @@ add_suffix <- function(fields, common_cols, suffix) { } implicit_schema <- function(.data) { + # Get the source data schema so that we can evaluate expressions to determine + # the output schema. Note that we don't use source_data() because we only + # want to go one level up (where we may have called implicit_schema() before) .data <- ensure_group_vars(.data) old_schm <- .data$.data$schema + # Add in any augmented fields that may exist in the query but not in the + # real data, in case we have FieldRefs to them + old_schm[["__filename"]] <- string() if (is.null(.data$aggregations)) { + # .data$selected_columns is a named list of Expressions (FieldRefs or + # something more complex). Bind them in order to determine their output type new_fields <- map(.data$selected_columns, ~ .$type(old_schm)) if (!is.null(.data$join) && !(.data$join$type %in% JoinType[1:4])) { # Add cols from right side, except for semi/anti joins @@ -128,6 +138,7 @@ implicit_schema <- function(.data) { new_fields <- c(left_fields, right_fields) } } else { + # The output schema is based on the aggregations and any group_by vars new_fields <- map(summarize_projection(.data), ~ .$type(old_schm)) # * Put group_by_vars first (this can't be done by summarize, # they have to be last per the aggregate node signature, diff --git a/r/R/dplyr-funcs-augmented.R b/r/R/dplyr-funcs-augmented.R new file mode 100644 index 0000000000000..6e751d49f61d5 --- /dev/null +++ b/r/R/dplyr-funcs-augmented.R @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +register_bindings_augmented <- function() { + register_binding("add_filename", function() { + Expression$field_ref("__filename") + }) +} diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index c1dcdd17744c4..4dadff54b48e1 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -151,6 +151,7 @@ create_binding_cache <- function() { register_bindings_math() register_bindings_string() register_bindings_type() + register_bindings_augmented() # We only create the cache for nse_funcs and not agg_funcs .cache$functions <- c(as.list(nse_funcs), arrow_funcs) diff --git a/r/R/dplyr.R b/r/R/dplyr.R index dd6340c4f52b2..dffe269199c86 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -110,6 +110,9 @@ make_field_refs <- function(field_names) { #' @export print.arrow_dplyr_query <- function(x, ...) { schm <- x$.data$schema + # If we are using this augmented field, it won't be in the schema + schm[["__filename"]] <- string() + types <- map_chr(x$selected_columns, function(expr) { name <- expr$field_name if (nzchar(name)) { diff --git a/r/R/duckdb.R b/r/R/duckdb.R index b924dafcabe69..379062ce3a1ba 100644 --- a/r/R/duckdb.R +++ b/r/R/duckdb.R @@ -47,9 +47,9 @@ #' #' ds %>% #' filter(mpg < 30) %>% -#' to_duckdb() %>% #' group_by(cyl) %>% -#' summarize(mean_mpg = mean(mpg, na.rm = TRUE)) +#' to_duckdb() %>% +#' slice_min(disp) to_duckdb <- function(.data, con = arrow_duck_connection(), table_name = unique_arrow_tablename(), diff --git a/r/R/flight.R b/r/R/flight.R index f56308f958495..0bd661e58d565 100644 --- a/r/R/flight.R +++ b/r/R/flight.R @@ -56,9 +56,11 @@ flight_disconnect <- function(client) { #' @param overwrite logical: if `path` exists on `client` already, should we #' replace it with the contents of `data`? Default is `TRUE`; if `FALSE` and #' `path` exists, the function will error. +#' @param max_chunksize integer: Maximum size for RecordBatch chunks when a `data.frame` is sent. +#' Individual chunks may be smaller depending on the chunk layout of individual columns. #' @return `client`, invisibly. #' @export -flight_put <- function(client, data, path, overwrite = TRUE) { +flight_put <- function(client, data, path, overwrite = TRUE, max_chunksize = NULL) { assert_is(data, c("data.frame", "Table", "RecordBatch")) if (!overwrite && flight_path_exists(client, path)) { @@ -70,8 +72,13 @@ flight_put <- function(client, data, path, overwrite = TRUE) { py_data <- reticulate::r_to_py(data) writer <- client$do_put(descriptor_for_path(path), py_data$schema)[[1]] - if (inherits(data, "RecordBatch")) { + if (inherits(data, "RecordBatch") && !is.null(max_chunksize)) { + warning("`max_chunksize` is not supported for flight_put with RecordBatch") writer$write_batch(py_data) + } else if (inherits(data, "RecordBatch")) { + writer$write_batch(py_data) + } else if (!is.null(max_chunksize)) { + writer$write_table(py_data, max_chunksize) } else { writer$write_table(py_data) } diff --git a/r/R/query-engine.R b/r/R/query-engine.R index 84360490fdbe7..c132b291b872b 100644 --- a/r/R/query-engine.R +++ b/r/R/query-engine.R @@ -142,12 +142,14 @@ ExecPlan <- R6Class("ExecPlan", } } else { # If any columns are derived, reordered, or renamed we need to Project - # If there are aggregations, the projection was already handled above + # If there are aggregations, the projection was already handled above. # We have to project at least once to eliminate some junk columns # that the ExecPlan adds: # __fragment_index, __batch_index, __last_in_fragment - # Presumably extraneous repeated projection of the same thing - # (as when we've done collapse() and not projected after) is cheap/no-op + # + # $Project() will check whether we actually need to project, so that + # repeated projection of the same thing + # (as when we've done collapse() and not projected after) is avoided projection <- c(.data$selected_columns, .data$temp_columns) node <- node$Project(projection) if (!is.null(.data$join)) { @@ -349,7 +351,11 @@ ExecNode <- R6Class("ExecNode", Project = function(cols) { if (length(cols)) { assert_is_list_of(cols, "Expression") - self$preserve_extras(ExecNode_Project(self, cols, names(cols))) + if (needs_projection(cols, self$schema)) { + self$preserve_extras(ExecNode_Project(self, cols, names(cols))) + } else { + self + } } else { self$preserve_extras(ExecNode_Project(self, character(0), character(0))) } @@ -402,3 +408,13 @@ do_exec_plan_substrait <- function(substrait_plan) { plan <- ExecPlan$create() ExecPlan_run_substrait(plan, substrait_plan) } + +needs_projection <- function(projection, schema) { + # Check whether `projection` would do anything to data with the given `schema` + field_names <- set_names(map_chr(projection, ~ .$field_name), NULL) + + # We need to apply `projection` if: + !all(nzchar(field_names)) || # Any of the Expressions are not FieldRefs + !identical(field_names, names(projection)) || # Any fields are renamed + !identical(field_names, names(schema)) # The fields are reordered +} diff --git a/r/R/table.R b/r/R/table.R index 5579c676d5157..d7e276415c5cd 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -331,5 +331,12 @@ as_arrow_table.arrow_dplyr_query <- function(x, ...) { # See query-engine.R for ExecPlan/Nodes plan <- ExecPlan$create() final_node <- plan$Build(x) - plan$Run(final_node, as_table = TRUE) + + run_with_event_loop <- identical( + Sys.getenv("R_ARROW_COLLECT_WITH_UDF", ""), + "true" + ) + + result <- plan$Run(final_node, as_table = run_with_event_loop) + as_arrow_table(result) } diff --git a/r/R/type.R b/r/R/type.R index 14234131a53f3..d4d7d52ad580d 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -58,6 +58,8 @@ FLOAT_TYPES <- c("float16", "float32", "float64", "halffloat", "float", "double" #' Infer the arrow Array type from an R object #' +#' [type()] is deprecated in favor of [infer_type()]. +#' #' @param x an R object (usually a vector) to be converted to an [Array] or #' [ChunkedArray]. #' @param ... Passed to S3 methods diff --git a/r/R/util.R b/r/R/util.R index 55ff29db73aaf..eef69d0244148 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -134,6 +134,10 @@ read_compressed_error <- function(e) { stop(e) } +# This function was refactored in ARROW-15260 to only raise an error if +# the appropriate string was found and so errors must be raised manually after +# calling this if matching error not found +# TODO: Refactor as part of ARROW-17355 to prevent potential missed errors handle_parquet_io_error <- function(e, format, call) { msg <- conditionMessage(e) if (grepl("Parquet magic bytes not found in footer", msg) && length(format) > 1 && is_character(format)) { @@ -143,8 +147,8 @@ handle_parquet_io_error <- function(e, format, call) { msg, i = "Did you mean to specify a 'format' other than the default (parquet)?" ) + abort(msg, call = call) } - abort(msg, call = call) } as_writable_table <- function(x) { @@ -205,6 +209,10 @@ repeat_value_as_array <- function(object, n) { return(Scalar$create(object)$as_array(n)) } +# This function was refactored in ARROW-15260 to only raise an error if +# the appropriate string was found and so errors must be raised manually after +# calling this if matching error not found +# TODO: Refactor as part of ARROW-17355 to prevent potential missed errors handle_csv_read_error <- function(e, schema, call) { msg <- conditionMessage(e) @@ -217,8 +225,27 @@ handle_csv_read_error <- function(e, schema, call) { "header being read in as data." ) ) + abort(msg, call = call) + } +} + +# This function only raises an error if +# the appropriate string was found and so errors must be raised manually after +# calling this if matching error not found +# TODO: Refactor as part of ARROW-17355 to prevent potential missed errors +handle_augmented_field_misuse <- function(e, call) { + msg <- conditionMessage(e) + if (grepl("No match for FieldRef.Name(__filename)", msg, fixed = TRUE)) { + msg <- c( + msg, + i = paste( + "`add_filename()` or use of the `__filename` augmented field can only", + "be used with with Dataset objects, and can only be added before doing", + "an aggregation or a join." + ) + ) + abort(msg, call = call) } - abort(msg, call = call) } is_compressed <- function(compression) { diff --git a/r/man/flight_put.Rd b/r/man/flight_put.Rd index 13a8da16fead5..c306b0f7bb9e0 100644 --- a/r/man/flight_put.Rd +++ b/r/man/flight_put.Rd @@ -4,7 +4,7 @@ \alias{flight_put} \title{Send data to a Flight server} \usage{ -flight_put(client, data, path, overwrite = TRUE) +flight_put(client, data, path, overwrite = TRUE, max_chunksize = NULL) } \arguments{ \item{client}{\code{pyarrow.flight.FlightClient}, as returned by \code{\link[=flight_connect]{flight_connect()}}} @@ -16,6 +16,9 @@ flight_put(client, data, path, overwrite = TRUE) \item{overwrite}{logical: if \code{path} exists on \code{client} already, should we replace it with the contents of \code{data}? Default is \code{TRUE}; if \code{FALSE} and \code{path} exists, the function will error.} + +\item{max_chunksize}{integer: Maximum size for RecordBatch chunks when a \code{data.frame} is sent. +Individual chunks may be smaller depending on the chunk layout of individual columns.} } \value{ \code{client}, invisibly. diff --git a/r/man/infer_type.Rd b/r/man/infer_type.Rd index 88d6179e49b5e..1bba27255678b 100644 --- a/r/man/infer_type.Rd +++ b/r/man/infer_type.Rd @@ -19,7 +19,7 @@ type(x) An arrow \link[=data-type]{data type} } \description{ -Infer the arrow Array type from an R object +\code{\link[=type]{type()}} is deprecated in favor of \code{\link[=infer_type]{infer_type()}}. } \examples{ infer_type(1:10) diff --git a/r/man/register_scalar_function.Rd b/r/man/register_scalar_function.Rd index 4da8f54f645b0..324dd5fad1f58 100644 --- a/r/man/register_scalar_function.Rd +++ b/r/man/register_scalar_function.Rd @@ -48,7 +48,7 @@ stateless and return output with the same shape (i.e., the same number of rows) as the input. } \examples{ -\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (arrow_with_dataset() && identical(Sys.getenv("NOT_CRAN"), "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(dplyr, warn.conflicts = FALSE) some_model <- lm(mpg ~ disp + cyl, data = mtcars) diff --git a/r/man/to_duckdb.Rd b/r/man/to_duckdb.Rd index 79c089239b2a8..0ff2aa4404c70 100644 --- a/r/man/to_duckdb.Rd +++ b/r/man/to_duckdb.Rd @@ -47,8 +47,8 @@ ds <- InMemoryDataset$create(mtcars) ds \%>\% filter(mpg < 30) \%>\% - to_duckdb() \%>\% group_by(cyl) \%>\% - summarize(mean_mpg = mean(mpg, na.rm = TRUE)) + to_duckdb() \%>\% + slice_min(disp) \dontshow{\}) # examplesIf} } diff --git a/r/man/write_dataset.Rd b/r/man/write_dataset.Rd index 8fc07d5cc790a..1bc940697cc4a 100644 --- a/r/man/write_dataset.Rd +++ b/r/man/write_dataset.Rd @@ -38,8 +38,9 @@ use the current \code{group_by()} columns.} \item{basename_template}{string template for the names of files to be written. Must contain \code{"{i}"}, which will be replaced with an autoincremented -integer to generate basenames of datafiles. For example, \code{"part-{i}.feather"} -will yield \verb{"part-0.feather", ...}.} +integer to generate basenames of datafiles. For example, \code{"part-{i}.arrow"} +will yield \verb{"part-0.arrow", ...}. +If not specified, it defaults to \code{"part-{i}."}.} \item{hive_style}{logical: write partition segments as Hive-style (\code{key1=value1/key2=value2/file.ext}) or as just bare values. Default is \code{TRUE}.} diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index 1e54cbd21a48c..f1ddd85f055e3 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -1,12 +1,16 @@ [ { - "name": "8.0.0.9000 (dev)", + "name": "9.0.0.9000 (dev)", "version": "dev/" }, { - "name": "8.0.0 (release)", + "name": "9.0.0 (release)", "version": "" }, + { + "name": "8.0.0", + "version": "8.0/" + }, { "name": "7.0.0", "version": "7.0/" diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp index 91d646f0a3c08..f9183a3a10303 100644 --- a/r/src/compute-exec.cpp +++ b/r/src/compute-exec.cpp @@ -222,8 +222,7 @@ std::shared_ptr ExecNode_Scan( options->dataset_schema = dataset->schema(); - // ScanNode needs the filter to do predicate pushdown and skip partitions - options->filter = ValueOrStop(filter->Bind(*dataset->schema())); + options->filter = *filter; // ScanNode needs to know which fields to materialize (and which are unnecessary) std::vector exprs; @@ -232,9 +231,8 @@ std::shared_ptr ExecNode_Scan( } options->projection = - ValueOrStop(call("make_struct", std::move(exprs), - compute::MakeStructOptions{std::move(materialized_field_names)}) - .Bind(*dataset->schema())); + call("make_struct", std::move(exprs), + compute::MakeStructOptions{std::move(materialized_field_names)}); return MakeExecNodeOrStop("scan", plan.get(), {}, ds::ScanNodeOptions{dataset, options}); diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R index fd1ce1a76c325..7a6c2687ed895 100644 --- a/r/tests/testthat/helper-skip.R +++ b/r/tests/testthat/helper-skip.R @@ -22,7 +22,7 @@ build_features <- c( ) force_tests <- function() { - identical(tolower(Sys.getenv("ARROW_R_force_tests()")), "true") + identical(tolower(Sys.getenv("ARROW_R_FORCE_TESTS")), "true") } skip_if_not_available <- function(feature) { diff --git a/r/tests/testthat/test-compute.R b/r/tests/testthat/test-compute.R index 9e487169f4b15..5821c0fa2df1c 100644 --- a/r/tests/testthat/test-compute.R +++ b/r/tests/testthat/test-compute.R @@ -81,6 +81,9 @@ test_that("arrow_scalar_function() works with auto_convert = TRUE", { test_that("register_scalar_function() adds a compute function to the registry", { skip_if_not(CanRunWithCapturedR()) + # TODO(ARROW-17178): User-defined function-friendly ExecPlan execution has + # occasional valgrind errors + skip_on_linux_devel() register_scalar_function( "times_32", @@ -88,7 +91,11 @@ test_that("register_scalar_function() adds a compute function to the registry", int32(), float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit({ + unregister_binding("times_32", update_cache = TRUE) + # TODO(ARROW-17178) remove the need for this! + Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF") + }) expect_true("times_32" %in% names(asNamespace("arrow")$.cache$functions)) expect_true("times_32" %in% list_compute_functions()) @@ -120,9 +127,11 @@ test_that("arrow_scalar_function() with bad return type errors", { int32(), float64() ) - on.exit( + on.exit({ unregister_binding("times_32_bad_return_type_array", update_cache = TRUE) - ) + # TODO(ARROW-17178) remove the need for this! + Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF") + }) expect_error( call_function("times_32_bad_return_type_array", Array$create(1L)), @@ -135,9 +144,11 @@ test_that("arrow_scalar_function() with bad return type errors", { int32(), float64() ) - on.exit( + on.exit({ unregister_binding("times_32_bad_return_type_scalar", update_cache = TRUE) - ) + # TODO(ARROW-17178) remove the need for this! + Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF") + }) expect_error( call_function("times_32_bad_return_type_scalar", Array$create(1L)), @@ -145,7 +156,7 @@ test_that("arrow_scalar_function() with bad return type errors", { ) }) -test_that("register_user_defined_function() can register multiple kernels", { +test_that("register_scalar_function() can register multiple kernels", { skip_if_not(CanRunWithCapturedR()) register_scalar_function( @@ -155,7 +166,11 @@ test_that("register_user_defined_function() can register multiple kernels", { out_type = function(in_types) in_types[[1]], auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit({ + unregister_binding("times_32", update_cache = TRUE) + # TODO(ARROW-17178) remove the need for this! + Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF") + }) expect_equal( call_function("times_32", Scalar$create(1L, int32())), @@ -173,7 +188,10 @@ test_that("register_user_defined_function() can register multiple kernels", { ) }) -test_that("register_user_defined_function() errors for unsupported specifications", { +test_that("register_scalar_function() errors for unsupported specifications", { + # TODO(ARROW-17178) remove the need for this! + on.exit(Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF")) + expect_error( register_scalar_function( "no_kernels", @@ -208,7 +226,10 @@ test_that("register_user_defined_function() errors for unsupported specification test_that("user-defined functions work during multi-threaded execution", { skip_if_not(CanRunWithCapturedR()) skip_if_not_available("dataset") - # Snappy has a UBSan issue: https://github.com/google/snappy/pull/148 + # Skip on linux devel because: + # TODO(ARROW-17283): Snappy has a UBSan issue that is fixed in the dev version + # TODO(ARROW-17178): User-defined function-friendly ExecPlan execution has + # occasional valgrind errors skip_on_linux_devel() n_rows <- 10000 @@ -235,7 +256,11 @@ test_that("user-defined functions work during multi-threaded execution", { float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit({ + unregister_binding("times_32", update_cache = TRUE) + # TODO(ARROW-17178) remove the need for this! + Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF") + }) # check a regular collect() result <- open_dataset(tf_dataset) %>% @@ -268,7 +293,11 @@ test_that("user-defined error when called from an unsupported context", { float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit({ + unregister_binding("times_32", update_cache = TRUE) + # TODO(ARROW-17178) remove the need for this! + Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF") + }) stream_plan_with_udf <- function() { record_batch(a = 1:1000) %>% diff --git a/r/tests/testthat/test-dataset-write.R b/r/tests/testthat/test-dataset-write.R index 2f4ff7e649e88..7a5f861ca579d 100644 --- a/r/tests/testthat/test-dataset-write.R +++ b/r/tests/testthat/test-dataset-write.R @@ -63,7 +63,7 @@ test_that("Writing a dataset: CSV->IPC", { # Check whether "int" is present in the files or just in the dirs first <- read_feather( - dir(dst_dir, pattern = ".feather$", recursive = TRUE, full.names = TRUE)[1], + dir(dst_dir, pattern = ".arrow$", recursive = TRUE, full.names = TRUE)[1], as_data_frame = FALSE ) # It shouldn't be there @@ -139,6 +139,40 @@ test_that("Writing a dataset: Parquet->Parquet (default)", { ) }) +test_that("Writing a dataset: `basename_template` default behavier", { + ds <- open_dataset(csv_dir, partitioning = "part", format = "csv") + + dst_dir <- make_temp_dir() + write_dataset(ds, dst_dir, format = "parquet", max_rows_per_file = 5L) + expect_identical( + dir(dst_dir, full.names = FALSE, recursive = TRUE), + paste0("part-", 0:3, ".parquet") + ) + dst_dir <- make_temp_dir() + write_dataset(ds, dst_dir, format = "parquet", basename_template = "{i}.data", max_rows_per_file = 5L) + expect_identical( + dir(dst_dir, full.names = FALSE, recursive = TRUE), + paste0(0:3, ".data") + ) + dst_dir <- make_temp_dir() + expect_error( + write_dataset(ds, dst_dir, format = "parquet", basename_template = "part-i.parquet"), + "basename_template did not contain '\\{i\\}'" + ) + feather_dir <- make_temp_dir() + write_dataset(ds, feather_dir, format = "feather", partitioning = "int") + expect_identical( + dir(feather_dir, full.names = FALSE, recursive = TRUE), + sort(paste(paste("int", c(1:10, 101:110), sep = "="), "part-0.arrow", sep = "/")) + ) + ipc_dir <- make_temp_dir() + write_dataset(ds, ipc_dir, format = "ipc", partitioning = "int") + expect_identical( + dir(ipc_dir, full.names = FALSE, recursive = TRUE), + sort(paste(paste("int", c(1:10, 101:110), sep = "="), "part-0.arrow", sep = "/")) + ) +}) + test_that("Writing a dataset: existing data behavior", { # This test does not work on Windows because unlink does not immediately # delete the data. @@ -458,8 +492,10 @@ test_that("Writing a dataset: CSV format options", { test_that("Dataset writing: unsupported features/input validation", { skip_if_not_available("parquet") expect_error(write_dataset(4), "You must supply a") - expect_error(write_dataset(data.frame(x = 1, x = 2, check.names = FALSE)), - "Field names must be unique") + expect_error( + write_dataset(data.frame(x = 1, x = 2, check.names = FALSE)), + "Field names must be unique" + ) ds <- open_dataset(hive_dir) expect_error( diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index d43bb492d020f..d9512ef94f3d7 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -1131,7 +1131,6 @@ test_that("dataset to C-interface to arrow_dplyr_query with proj/filter", { delete_arrow_array_stream(stream_ptr) }) - test_that("Filter parquet dataset with is.na ARROW-15312", { ds_path <- make_temp_dir() @@ -1349,3 +1348,96 @@ test_that("FileSystemFactoryOptions input validation", { fixed = TRUE ) }) + +test_that("can add in augmented fields", { + ds <- open_dataset(hive_dir) + + observed <- ds %>% + mutate(file_name = add_filename()) %>% + collect() + + expect_named( + observed, + c("int", "dbl", "lgl", "chr", "fct", "ts", "group", "other", "file_name") + ) + + expect_equal( + sort(unique(observed$file_name)), + list.files(hive_dir, full.names = TRUE, recursive = TRUE) + ) + + error_regex <- paste( + "`add_filename()` or use of the `__filename` augmented field can only", + "be used with with Dataset objects, and can only be added before doing", + "an aggregation or a join." + ) + + # errors appropriately with ArrowTabular objects + expect_error( + arrow_table(mtcars) %>% + mutate(file = add_filename()) %>% + collect(), + regexp = error_regex, + fixed = TRUE + ) + + # errors appropriately with aggregation + expect_error( + ds %>% + summarise(max_int = max(int)) %>% + mutate(file_name = add_filename()) %>% + collect(), + regexp = error_regex, + fixed = TRUE + ) + + # joins to tables + another_table <- select(example_data, int, dbl2) + expect_error( + ds %>% + left_join(another_table, by = "int") %>% + mutate(file = add_filename()) %>% + collect(), + regexp = error_regex, + fixed = TRUE + ) + + # and on joins to datasets + another_dataset <- write_dataset(another_table, "another_dataset") + expect_error( + ds %>% + left_join(open_dataset("another_dataset"), by = "int") %>% + mutate(file = add_filename()) %>% + collect(), + regexp = error_regex, + fixed = TRUE + ) + + # this hits the implicit_schema path by joining afterwards + join_after <- ds %>% + mutate(file = add_filename()) %>% + left_join(open_dataset("another_dataset"), by = "int") %>% + collect() + + expect_named( + join_after, + c("int", "dbl", "lgl", "chr", "fct", "ts", "group", "other", "file", "dbl2") + ) + + expect_equal( + sort(unique(join_after$file)), + list.files(hive_dir, full.names = TRUE, recursive = TRUE) + ) + + # another test on the explicit_schema path + summarise_after <- ds %>% + mutate(file = add_filename()) %>% + group_by(file) %>% + summarise(max_int = max(int)) %>% + collect() + + expect_equal( + sort(summarise_after$file), + list.files(hive_dir, full.names = TRUE, recursive = TRUE) + ) +}) diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R index 3c121780da64d..f1b4f9cea3a46 100644 --- a/r/tests/testthat/test-dplyr-collapse.R +++ b/r/tests/testthat/test-dplyr-collapse.R @@ -242,3 +242,39 @@ test_that("query_on_dataset handles collapse()", { select(int) )) }) + +test_that("collapse doesn't unnecessarily add ProjectNodes", { + plan <- capture.output( + tab %>% + collapse() %>% + collapse() %>% + show_query() + ) + # There should be no projections + expect_length(grep("ProjectNode", plan), 0) + + plan <- capture.output( + tab %>% + select(int, chr) %>% + collapse() %>% + collapse() %>% + show_query() + ) + # There should be just one projection + expect_length(grep("ProjectNode", plan), 1) + + skip_if_not_available("dataset") + # We need one ProjectNode on dataset queries to handle augmented fields + + tf <- tempfile() + write_dataset(tab, tf, partitioning = "lgl") + ds <- open_dataset(tf) + + plan <- capture.output( + ds %>% + collapse() %>% + collapse() %>% + show_query() + ) + expect_length(grep("ProjectNode", plan), 1) +}) diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index 423fe1ccd8ea4..9eb7bc62db84e 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -905,7 +905,7 @@ test_that("str_like", { ) # This will give an error until a new version of stringr with str_like has been released - skip_if_not(packageVersion("stringr") > "1.4.0") + skip_if_not(packageVersion("stringr") > "1.5.0") compare_dplyr_binding( .input %>% mutate(x = str_like(x, "%baz%")) %>% diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index 37ab178cbb40f..1a5b6ec8a7c76 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -448,9 +448,9 @@ test_that("show_exec_plan(), show_query() and explain()", { arrow_table() %>% show_exec_plan(), regexp = paste0( - "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan - "ProjectNode.*", # output columns - "TableSourceNode" # entry point + "ExecPlan with 2 nodes:.*", # boiler plate for ExecPlan + "SinkNode.*", # output + "TableSourceNode" # entry point ) ) @@ -463,12 +463,12 @@ test_that("show_exec_plan(), show_query() and explain()", { mutate(int_plus_ten = int + 10) %>% show_exec_plan(), regexp = paste0( - "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan - "chr, int, lgl, \"int_plus_ten\".*", # selected columns - "FilterNode.*", # filter node - "(dbl > 2).*", # filter expressions + "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan + "chr, int, lgl, \"int_plus_ten\".*", # selected columns + "FilterNode.*", # filter node + "(dbl > 2).*", # filter expressions "chr != \"e\".*", - "TableSourceNode" # entry point + "TableSourceNode" # entry point ) ) @@ -481,11 +481,11 @@ test_that("show_exec_plan(), show_query() and explain()", { mutate(int_plus_ten = int + 10) %>% show_exec_plan(), regexp = paste0( - "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan - "chr, int, lgl, \"int_plus_ten\".*", # selected columns - "(dbl > 2).*", # the filter expressions + "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan + "chr, int, lgl, \"int_plus_ten\".*", # selected columns + "(dbl > 2).*", # the filter expressions "chr != \"e\".*", - "TableSourceNode" # the entry point" + "TableSourceNode" # the entry point" ) ) @@ -497,13 +497,13 @@ test_that("show_exec_plan(), show_query() and explain()", { summarise(avg = mean(dbl, na.rm = TRUE)) %>% show_exec_plan(), regexp = paste0( - "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan - "ProjectNode.*", # output columns - "GroupByNode.*", # the group_by statement - "keys=.*lgl.*", # the key for the aggregations - "aggregates=.*hash_mean.*avg.*", # the aggregations - "ProjectNode.*", # the input columns - "TableSourceNode" # the entry point + "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan + "ProjectNode.*", # output columns + "GroupByNode.*", # the group_by statement + "keys=.*lgl.*", # the key for the aggregations + "aggregates=.*hash_mean.*avg.*", # the aggregations + "ProjectNode.*", # the input columns + "TableSourceNode" # the entry point ) ) @@ -521,14 +521,13 @@ test_that("show_exec_plan(), show_query() and explain()", { select(int, verses, doubled_dbl) %>% show_exec_plan(), regexp = paste0( - "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan - "ProjectNode.*", # output columns - "HashJoinNode.*", # the join - "ProjectNode.*", # input columns for the second table + "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan + "ProjectNode.*", # output columns + "HashJoinNode.*", # the join + "ProjectNode.*", # input columns for the second table "\"doubled_dbl\"\\: multiply_checked\\(dbl, 2\\).*", # mutate - "TableSourceNode.*", # second table - "ProjectNode.*", # input columns for the first table - "TableSourceNode" # first table + "TableSourceNode.*", # second table + "TableSourceNode" # first table ) ) @@ -539,11 +538,10 @@ test_that("show_exec_plan(), show_query() and explain()", { arrange(desc(wt)) %>% show_exec_plan(), regexp = paste0( - "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan + "ExecPlan with .* nodes:.*", # boiler plate for ExecPlan "OrderBySinkNode.*wt.*DESC.*", # arrange goes via the OrderBy sink node - "ProjectNode.*", # output columns - "FilterNode.*", # filter node - "TableSourceNode.*" # entry point + "FilterNode.*", # filter node + "TableSourceNode.*" # entry point ) ) @@ -559,3 +557,27 @@ test_that("show_exec_plan(), show_query() and explain()", { "The `ExecPlan` cannot be printed for a nested query." ) }) + +test_that("needs_projection unit tests", { + tab <- Table$create(tbl) + # Wrapper to simplify tests + query_needs_projection <- function(query) { + needs_projection(query$selected_columns, tab$schema) + } + expect_false(query_needs_projection(as_adq(tab))) + expect_false(query_needs_projection( + tab %>% collapse() %>% collapse() + )) + expect_true(query_needs_projection( + tab %>% mutate(int = int + 2) + )) + expect_true(query_needs_projection( + tab %>% select(int, chr) + )) + expect_true(query_needs_projection( + tab %>% rename(int2 = int) + )) + expect_true(query_needs_projection( + tab %>% relocate(lgl) + )) +}) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index f799fcbf38487..0ee0c5739dbb6 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -243,8 +243,10 @@ test_that("n_distinct() with many batches", { write_parquet(dplyr::starwars, tf, chunk_size = 20) ds <- open_dataset(tf) - expect_equal(ds %>% summarise(n_distinct(sex, na.rm = FALSE)) %>% collect(), - ds %>% collect() %>% summarise(n_distinct(sex, na.rm = FALSE))) + expect_equal( + ds %>% summarise(n_distinct(sex, na.rm = FALSE)) %>% collect(), + ds %>% collect() %>% summarise(n_distinct(sex, na.rm = FALSE)) + ) }) test_that("n_distinct() on dataset", { @@ -1089,3 +1091,38 @@ test_that("summarise() supports namespacing", { tbl ) }) + +test_that("We don't add unnecessary ProjectNodes when aggregating", { + tab <- Table$create(tbl) + + # Wrapper to simplify the tests + expect_project_nodes <- function(query, n) { + plan <- capture.output(query %>% show_query()) + expect_length(grep("ProjectNode", plan), n) + } + + # 1 Projection: select int as `mean(int)` before aggregation + expect_project_nodes( + tab %>% summarize(mean(int)), + 1 + ) + + # 0 Projections only if + # (a) input only contains the col you're aggregating, and + # (b) the output col name is the same as the input name, and + # (c) no grouping + expect_project_nodes( + tab[, "int"] %>% summarize(int = mean(int, na.rm = TRUE)), + 0 + ) + + # 2 projections: one before, and one after in order to put grouping cols first + expect_project_nodes( + tab %>% group_by(lgl) %>% summarize(mean(int)), + 2 + ) + expect_project_nodes( + tab %>% count(lgl), + 2 + ) +}) diff --git a/r/tests/testthat/test-python-flight.R b/r/tests/testthat/test-python-flight.R index 6fdf38f815ba6..d2b6fd491e189 100644 --- a/r/tests/testthat/test-python-flight.R +++ b/r/tests/testthat/test-python-flight.R @@ -38,6 +38,20 @@ if (process_is_running("demo_flight_server")) { ) }) + test_that("flight_put with max_chunksize", { + flight_put(client, example_data, path = flight_obj, max_chunksize = 1) + expect_true(flight_path_exists(client, flight_obj)) + expect_true(flight_obj %in% list_flights(client)) + expect_warning( + flight_put(client, record_batch(example_data), path = flight_obj, max_chunksize = 123), + regexp = "`max_chunksize` is not supported for flight_put with RecordBatch" + ) + expect_error( + flight_put(client, Array$create(c(1:3)), path = flight_obj), + regexp = 'data must be a "data.frame", "Table", or "RecordBatch"' + ) + }) + test_that("flight_get", { expect_identical(as.data.frame(flight_get(client, flight_obj)), example_data) }) diff --git a/r/tools/autobrew b/r/tools/autobrew index ea46be2c0d19c..55fe9576086f4 100644 --- a/r/tools/autobrew +++ b/r/tools/autobrew @@ -19,8 +19,15 @@ export HOMEBREW_NO_ANALYTICS=1 export HOMEBREW_NO_AUTO_UPDATE=1 -# Official Homebrew no longer supports El-Capitan -UPSTREAM_ORG="autobrew" +if [[ ${OSTYPE:6} -ge 20 ]]; then + # We are on a modern enough macOS, we can use the real brew + UPSTREAM_ORG="homebrew" + PKG_BREW_NAME="$PKG_BREW_NAME-static" +else + # Official Homebrew no longer supports El-Capitan + # so we need to use the forked autobrew version of brew that supports old macOSes + UPSTREAM_ORG="autobrew" +fi if [ "$DISABLE_AUTOBREW" ]; then return 0; fi AUTOBREW=${TMPDIR-/tmp} @@ -36,6 +43,13 @@ curl -fsSL https://github.com/$UPSTREAM_ORG/brew/tarball/master | tar xz --strip export HOMEBREW_CACHE="$AUTOBREW" LOCAL_FORMULA="tools/${PKG_BREW_NAME}.rb" if [ -f "$LOCAL_FORMULA" ]; then + if [[ ${OSTYPE:6} -ge 20 ]]; then + $BREW tap + + # Tap https://github.com/autobrew/homebrew-cran so that we can get dependencies from there + $BREW tap autobrew/cran + fi + # Use the local brew formula and install --HEAD $BREW deps -n "$LOCAL_FORMULA" 2>/dev/null BREW_DEPS=$($BREW deps -n "$LOCAL_FORMULA" 2>/dev/null) diff --git a/r/vignettes/dataset.Rmd b/r/vignettes/dataset.Rmd index 1a969f979c62b..0890d36ff42e0 100644 --- a/r/vignettes/dataset.Rmd +++ b/r/vignettes/dataset.Rmd @@ -126,7 +126,7 @@ For more information on the usage of these parameters, see `?read_delim_arrow()` `open_dataset()` was able to automatically infer column values for `year` and `month` --which are not present in the data files--based on the directory structure. The -Hive-style partitioning structure is self-describing, with file paths like +[Hive](https://hive.apache.org/)-style partitioning structure is self-describing, with file paths like ``` year=2009/month=1/data.parquet @@ -185,20 +185,6 @@ month: int32 ") ``` -The other form of partitioning currently supported is [Hive](https://hive.apache.org/)-style, -in which the partition variable names are included in the path segments. -If you had saved your files in paths like: - -``` -year=2009/month=01/data.parquet -year=2009/month=02/data.parquet -... -``` - -you would not have had to provide the names in `partitioning`; -you could have just called `ds <- open_dataset("nyc-taxi")` and the partitions -would have been detected automatically. - ## Querying the dataset Up to this point, you haven't loaded any data. You've walked directories to find diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb index d65eec037199c..080f08d608f01 100644 --- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb +++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowCUDA - VERSION = "9.0.0-SNAPSHOT" + VERSION = "10.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb index 9df0d72993923..ce942c0150f64 100644 --- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb +++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowDataset - VERSION = "9.0.0-SNAPSHOT" + VERSION = "10.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb index 2f27664b4e842..7cd5dc287f9d8 100644 --- a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb +++ b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlightSQL - VERSION = "9.0.0-SNAPSHOT" + VERSION = "10.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb index a1fc6b8501669..ebc39547b5f80 100644 --- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb +++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlight - VERSION = "9.0.0-SNAPSHOT" + VERSION = "10.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb index 0b9d43e242dd9..8ec135984dd80 100644 --- a/ruby/red-arrow/lib/arrow/version.rb +++ b/ruby/red-arrow/lib/arrow/version.rb @@ -16,7 +16,7 @@ # under the License. module Arrow - VERSION = "9.0.0-SNAPSHOT" + VERSION = "10.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb index ca2ff5473c28f..1c7a7e9592ff4 100644 --- a/ruby/red-gandiva/lib/gandiva/version.rb +++ b/ruby/red-gandiva/lib/gandiva/version.rb @@ -16,7 +16,7 @@ # under the License. module Gandiva - VERSION = "9.0.0-SNAPSHOT" + VERSION = "10.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb index 749a27746662d..2ae31fd435b9a 100644 --- a/ruby/red-parquet/lib/parquet/version.rb +++ b/ruby/red-parquet/lib/parquet/version.rb @@ -16,7 +16,7 @@ # under the License. module Parquet - VERSION = "9.0.0-SNAPSHOT" + VERSION = "10.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-plasma/lib/plasma/version.rb b/ruby/red-plasma/lib/plasma/version.rb index dbe6fe07c3b33..596e3a87f75d0 100644 --- a/ruby/red-plasma/lib/plasma/version.rb +++ b/ruby/red-plasma/lib/plasma/version.rb @@ -16,7 +16,7 @@ # under the License. module Plasma - VERSION = "9.0.0-SNAPSHOT" + VERSION = "10.0.0-SNAPSHOT" module Version numbers, TAG = VERSION.split("-")

{@link #getConnectStringPrefix} * the URL prefix accepted by this driver, i.e., - * {@code "jdbc:arrow-flight://"} + * {@code "jdbc:arrow-flight-sql://"} *