FEAT-#6284: Do not convert HDK query execution result to arrow. #269
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: ci | |
on: | |
pull_request: | |
paths: | |
# NOTE: keep these paths in sync with the paths that trigger the | |
# fuzzydata Github Actions in .github/workflows/fuzzydata-test.yml | |
- .github/workflows/** | |
- '!.github/workflows/push-to-master.yml' | |
- asv_bench/** | |
- modin/** | |
- requirements/** | |
- scripts/** | |
- environment-dev.yml | |
- requirements-dev.txt | |
- setup.cfg | |
- setup.py | |
- versioneer.py | |
push: | |
concurrency: | |
# Cancel other jobs in the same branch. We don't care whether CI passes | |
# on old commits. | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} | |
env: | |
MODIN_GITHUB_CI: true | |
jobs: | |
lint-black: | |
name: lint (black) | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.8.x" | |
architecture: "x64" | |
- run: pip install black | |
# NOTE: keep the black command here in sync with the pre-commit hook in | |
# /contributing/pre-commit | |
- run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py | |
lint-mypy: | |
name: lint (mypy) | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.8.x" | |
architecture: "x64" | |
- run: pip install -r requirements-dev.txt | |
- run: mypy --config-file mypy.ini | |
lint-pydocstyle: | |
if: github.event_name == 'pull_request' | |
name: lint (pydocstyle) | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.8.x" | |
architecture: "x64" | |
# The `numpydoc` version here MUST match the versions in the dev requirements files. | |
- run: pip install pytest pytest-cov pydocstyle numpydoc==1.1.0 xgboost | |
- run: python -m pytest scripts/test | |
- run: pip install -e ".[all]" | |
- run: | | |
python scripts/doc_checker.py --add-ignore=D101,D102,D103,D105 --disable-numpydoc \ | |
modin/pandas/dataframe.py modin/pandas/series.py \ | |
modin/pandas/groupby.py \ | |
modin/pandas/series_utils.py modin/pandas/general.py \ | |
modin/pandas/plotting.py modin/pandas/utils.py \ | |
modin/pandas/iterator.py modin/pandas/indexing.py \ | |
- run: python scripts/doc_checker.py modin/core/dataframe | |
- run: python scripts/doc_checker.py modin/core/execution/dask | |
- run: | | |
python scripts/doc_checker.py \ | |
modin/pandas/accessor.py modin/pandas/general.py \ | |
modin/pandas/groupby.py modin/pandas/indexing.py \ | |
modin/pandas/iterator.py modin/pandas/plotting.py \ | |
modin/pandas/series_utils.py modin/pandas/utils.py \ | |
modin/pandas/base.py \ | |
modin/pandas/io.py \ | |
asv_bench/benchmarks/utils \ | |
asv_bench/benchmarks/__init__.py asv_bench/benchmarks/io/__init__.py \ | |
asv_bench/benchmarks/scalability/__init__.py \ | |
modin/core/io \ | |
modin/experimental/core/execution/ray/implementations/pandas_on_ray \ | |
modin/experimental/core/execution/ray/implementations/pyarrow_on_ray \ | |
modin/pandas/series.py \ | |
modin/core/execution/python \ | |
modin/pandas/dataframe.py \ | |
modin/config/__init__.py \ | |
modin/config/__main__.py \ | |
modin/config/envvars.py \ | |
modin/config/pubsub.py | |
- run: python scripts/doc_checker.py modin/distributed | |
- run: python scripts/doc_checker.py modin/utils.py | |
- run: python scripts/doc_checker.py modin/experimental/sklearn | |
- run: | | |
python scripts/doc_checker.py modin/experimental/xgboost/__init__.py \ | |
modin/experimental/xgboost/utils.py modin/experimental/xgboost/xgboost.py \ | |
modin/experimental/xgboost/xgboost_ray.py | |
- run: python scripts/doc_checker.py modin/core/execution/ray | |
- run: | | |
python scripts/doc_checker.py modin/core/execution/dispatching/factories/factories.py \ | |
modin/core/execution/dispatching/factories/dispatcher.py \ | |
- run: python scripts/doc_checker.py scripts/doc_checker.py | |
- run: | | |
python scripts/doc_checker.py modin/experimental/pandas/io.py \ | |
modin/experimental/pandas/numpy_wrap.py modin/experimental/pandas/__init__.py | |
- run: python scripts/doc_checker.py modin/core/storage_formats/base | |
- run: python scripts/doc_checker.py modin/experimental/core/storage_formats/pyarrow | |
- run: python scripts/doc_checker.py modin/core/storage_formats/pandas | |
- run: | | |
python scripts/doc_checker.py \ | |
modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe \ | |
modin/experimental/core/execution/native/implementations/hdk_on_native/io \ | |
modin/experimental/core/execution/native/implementations/hdk_on_native/partitioning \ | |
modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_algebra.py \ | |
modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_builder.py \ | |
modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_serializer.py \ | |
modin/experimental/core/execution/native/implementations/hdk_on_native/df_algebra.py \ | |
modin/experimental/core/execution/native/implementations/hdk_on_native/expr.py \ | |
modin/experimental/core/execution/native/implementations/hdk_on_native/hdk_worker.py \ | |
- run: python scripts/doc_checker.py modin/experimental/core/storage_formats/hdk | |
- run: python scripts/doc_checker.py modin/experimental/core/execution/native/implementations/hdk_on_native/interchange/dataframe_protocol | |
- run: python scripts/doc_checker.py modin/experimental/batch/pipeline.py | |
- run: python scripts/doc_checker.py modin/logging | |
lint-flake8: | |
name: lint (flake8) | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.8.x" | |
architecture: "x64" | |
# NOTE: If you are changing the set of packages installed here, make sure that | |
# the dev requirements match them. | |
- run: pip install flake8 flake8-print flake8-no-implicit-concat | |
# NOTE: keep the flake8 command here in sync with the pre-commit hook in | |
# /contributing/pre-commit | |
- run: flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py | |
test-api: | |
runs-on: ubuntu-latest | |
name: test api | |
defaults: | |
run: | |
# `shell: bash -l {0}` - special way to activate modin environment | |
shell: bash -l {0} | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: environment-dev.yml | |
python-version: 3.8 | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- run: sudo apt update && sudo apt install -y libhdf5-dev | |
- name: Api tests | |
run: python -m pytest modin/pandas/test/test_api.py | |
- name: Executions Api tests | |
run: python -m pytest modin/test/test_executions_api.py | |
test-headers: | |
runs-on: ubuntu-latest | |
name: test-headers | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: environment-dev.yml | |
python-version: 3.8 | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- name: Headers tests | |
run: python -m pytest modin/test/test_headers.py | |
test-clean-install-ubuntu: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
name: test-clean-install-ubuntu | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.8.x" | |
architecture: "x64" | |
- name: Clean install and run | |
run: | | |
python -m pip install -e ".[all]" | |
MODIN_ENGINE=dask python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))" | |
MODIN_ENGINE=ray python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))" | |
MODIN_ENGINE=unidist UNIDIST_BACKEND=mpi mpiexec -n 1 python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))" | |
test-clean-install-windows: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: windows-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
name: test-clean-install-windows | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: "3.8.x" | |
architecture: "x64" | |
- name: Clean install and run | |
run: | | |
python -m pip install -e ".[all]" | |
MODIN_ENGINE=dask python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))" | |
MODIN_ENGINE=ray python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))" | |
MODIN_ENGINE=unidist UNIDIST_BACKEND=mpi mpiexec -n 1 python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))" | |
test-internals: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
name: test-internals | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: environment-dev.yml | |
python-version: 3.8 | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- name: Internals tests | |
run: python -m pytest modin/core/execution/dispatching/factories/test/test_dispatcher.py modin/experimental/cloud/test/test_cloud.py | |
- run: python -m pytest modin/config/test | |
- run: python -m pytest modin/test/test_envvar_catcher.py | |
- run: python -m pytest modin/test/storage_formats/base/test_internals.py | |
- run: python -m pytest modin/test/storage_formats/pandas/test_internals.py | |
- run: python -m pytest modin/test/test_envvar_npartitions.py | |
- run: python -m pytest modin/test/test_utils.py | |
- run: python -m pytest asv_bench/test/test_utils.py | |
- run: python -m pytest modin/test/interchange/dataframe_protocol/base | |
- run: python -m pytest modin/test/test_logging.py | |
- uses: ./.github/workflows/upload-coverage | |
test-no-engine: | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('requirements-no-engine.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: requirements/requirements-no-engine.yml | |
python-version: 3.8 | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- run: python -m pytest modin/core/execution/dispatching/factories/test/test_dispatcher.py::test_add_option | |
- uses: ./.github/workflows/upload-coverage | |
test-defaults: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
matrix: | |
execution: [BaseOnPython] | |
env: | |
MODIN_TEST_DATASET_SIZE: "small" | |
name: Test ${{ matrix.execution }} execution, Python 3.8 | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 2 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: environment-dev.yml | |
python-version: 3.8 | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- name: Install HDF5 | |
run: sudo apt update && sudo apt install -y libhdf5-dev | |
- run: python -m pytest modin/experimental/xgboost/test/test_default.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/test/storage_formats/base/test_internals.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_binary.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_default.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_indexing.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_iter.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_join_sort.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_map_metadata.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_reduce.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_udf.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_window.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_pickle.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/test_series.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/test_rolling.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/test_expanding.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/test_concat.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/test_groupby.py --execution=${{ matrix.execution }} | |
- run: MODIN_EXPERIMENTAL_GROUPBY=1 python -m pytest -n 2 modin/pandas/test/test_groupby.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/test_reshape.py --execution=${{ matrix.execution }} | |
- run: python -m pytest -n 2 modin/pandas/test/test_general.py --execution=${{ matrix.execution }} | |
- uses: ./.github/workflows/upload-coverage | |
test-hdk: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
env: | |
MODIN_EXPERIMENTAL: "True" | |
MODIN_ENGINE: "native" | |
MODIN_STORAGE_FORMAT: "hdk" | |
name: Test HDK storage format, Python 3.8 | |
services: | |
moto: | |
image: motoserver/moto | |
ports: | |
- 5000:5000 | |
env: | |
AWS_ACCESS_KEY_ID: foobar_key | |
AWS_SECRET_ACCESS_KEY: foobar_secret | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 2 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('requirements/env_hdk.yml') }} | |
- name: Setting up Modin environment | |
uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin_on_hdk | |
environment-file: requirements/env_hdk.yml | |
python-version: 3.8 | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- name: Install HDF5 | |
run: sudo apt update && sudo apt install -y libhdf5-dev | |
- run: python -m pytest modin/test/storage_formats/hdk/test_internals.py | |
- run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_init.py | |
- run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py | |
- run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_utils.py | |
- run: python -m pytest modin/pandas/test/test_io.py --verbose | |
- run: python -m pytest modin/test/interchange/dataframe_protocol/test_general.py | |
- run: python -m pytest modin/test/interchange/dataframe_protocol/hdk | |
- run: python -m pytest modin/experimental/sql/test/test_sql.py | |
- run: python -m pytest modin/pandas/test/test_concat.py | |
- run: python -m pytest modin/pandas/test/dataframe/test_binary.py | |
- run: python -m pytest modin/pandas/test/dataframe/test_reduce.py | |
- run: python -m pytest modin/pandas/test/dataframe/test_join_sort.py | |
- run: python -m pytest modin/pandas/test/test_general.py | |
- run: python -m pytest modin/pandas/test/dataframe/test_indexing.py | |
- run: python -m pytest modin/pandas/test/test_series.py | |
- run: python -m pytest modin/pandas/test/dataframe/test_map_metadata.py | |
- run: python -m pytest modin/pandas/test/dataframe/test_window.py | |
- run: python -m pytest modin/pandas/test/dataframe/test_default.py | |
- run: python examples/docker/modin-hdk/census-hdk.py examples/data/census_1k.csv -no-ml | |
- run: python examples/docker/modin-hdk/nyc-taxi-hdk.py examples/data/nyc-taxi_1k.csv | |
- run: | | |
python examples/docker/modin-hdk/plasticc-hdk.py \ | |
examples/data/plasticc_training_set_1k.csv \ | |
examples/data/plasticc_test_set_1k.csv \ | |
examples/data/plasticc_training_set_metadata_1k.csv \ | |
examples/data/plasticc_test_set_metadata_1k.csv \ | |
-no-ml | |
- uses: ./.github/workflows/upload-coverage | |
test-asv-benchmarks: | |
if: github.event_name == 'pull_request' | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
env: | |
MODIN_ENGINE: ray | |
MODIN_MEMORY: 1000000000 | |
MODIN_TEST_DATASET_SIZE: small | |
name: test-asv-benchmarks | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
auto-activate-base: true | |
activate-environment: "" | |
- name: ASV installation | |
run: | | |
# FIXME: use the tag or release version of ASV as soon as it appears; | |
# The ability to build a conda environment by specifying yml file has not | |
# yet appeared in the release versions; | |
pip install git+https://github.com/airspeed-velocity/asv.git@ef016e233cb9a0b19d517135104f49e0a3c380e9 | |
- name: Running benchmarks | |
run: | | |
# ASV correctly creates environments for testing only from the branch | |
# with `master` name | |
git checkout -b master | |
cd asv_bench | |
asv check -v | |
git remote add upstream https://github.com/modin-project/modin.git | |
git fetch upstream | |
if git diff upstream/master --name-only | grep -q "^asv_bench/"; then | |
asv machine --yes | |
# check Modin on Ray | |
asv run --quick --strict --show-stderr --launch-method=spawn \ | |
-b ^benchmarks -b ^io -b ^scalability | tee benchmarks.log | |
# check pure pandas | |
MODIN_ASV_USE_IMPL=pandas asv run --quick --strict --show-stderr --launch-method=spawn \ | |
-b ^benchmarks -b ^io | tee benchmarks.log | |
# HDK: ERR_OUT_OF_CPU_MEM: Not enough host memory to execute the query (MODIN#4270) | |
# just disable test for testing - it works well in a machine with more memory | |
sed -i 's/def time_groupby_agg_nunique(self, \*args, \*\*kwargs):/# def time_groupby_agg_nunique(self, *args, **kwargs):/g' benchmarks/hdk/benchmarks.py | |
sed -i 's/execute(self.df.groupby(by=self.groupby_columns).agg("nunique"))/# execute(self.df.groupby(by=self.groupby_columns).agg("nunique"))/g' benchmarks/hdk/benchmarks.py | |
# check Modin on HDK | |
MODIN_ENGINE=native MODIN_STORAGE_FORMAT=hdk MODIN_EXPERIMENTAL=true asv run --quick --strict --show-stderr \ | |
--launch-method=forkserver --config asv.conf.hdk.json \ | |
-b ^hdk | tee benchmarks.log | |
else | |
echo "Benchmarks did not run, no changes detected" | |
fi | |
if: always() | |
- name: Publish benchmarks artifact | |
uses: actions/upload-artifact@master | |
with: | |
name: Benchmarks log | |
path: asv_bench/benchmarks.log | |
if: failure() | |
test-all-unidist: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
matrix: | |
python-version: ["3.8"] | |
unidist-backend: ["mpi"] | |
env: | |
MODIN_ENGINE: "Unidist" | |
UNIDIST_BACKEND: ${{matrix.unidist-backend}} | |
# Only test reading from SQL server and postgres on ubuntu for now. | |
# Eventually, we should test on Windows, too, but we will have to set up | |
# the servers differently. | |
MODIN_TEST_READ_FROM_SQL_SERVER: true | |
MODIN_TEST_READ_FROM_POSTGRES: true | |
name: test-ubuntu (engine unidist ${{matrix.unidist-backend}}, python ${{matrix.python-version}}) | |
services: | |
moto: | |
image: motoserver/moto | |
ports: | |
- 5000:5000 | |
env: | |
AWS_ACCESS_KEY_ID: foobar_key | |
AWS_SECRET_ACCESS_KEY: foobar_secret | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 2 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('requirements/env_unidist.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin_on_unidist | |
environment-file: requirements/env_unidist.yml | |
python-version: ${{matrix.python-version}} | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- name: Install HDF5 | |
run: sudo apt update && sudo apt install -y libhdf5-dev | |
- name: Set up postgres | |
# Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from | |
# https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3 | |
run: | | |
sudo docker pull postgres | |
sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres | |
- run: MODIN_BENCHMARK_MODE=True mpiexec -n 1 python -m pytest modin/pandas/test/internals/test_benchmark_mode.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/internals/test_repartition.py | |
- run: mpiexec -n 1 python -m pytest modin/test/test_partition_api.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_binary.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_default.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_indexing.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_iter.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_join_sort.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_map_metadata.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_reduce.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_udf.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_window.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/dataframe/test_pickle.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/test_series.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/test_rolling.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/test_expanding.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/test_concat.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/test_groupby.py | |
- run: MODIN_EXPERIMENTAL_GROUPBY=1 mpiexec -n 1 python -m pytest modin/pandas/test/test_groupby.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/test_reshape.py | |
- run: mpiexec -n 1 python -m pytest modin/pandas/test/test_general.py | |
- run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array.py | |
- run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_creation.py | |
- run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_arithmetic.py | |
- run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_axis_functions.py | |
- run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_logic.py | |
- run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_linalg.py | |
- run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_indexing.py | |
- run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_math.py | |
- run: mpiexec -n 1 python -m pytest modin/numpy/test/test_array_shaping.py | |
- run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh | |
- run: ./.github/workflows/sql_server/set_up_sql_server.sh | |
# need an extra argument "genv" to set environment variables for mpiexec. We need | |
# these variables to test writing to the mock s3 filesystem. | |
- run: mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest modin/pandas/test/test_io.py --verbose | |
- run: mpiexec -n 1 python -m pytest modin/experimental/pandas/test/test_io_exp.py | |
- run: pip install "dfsql>=0.4.2" "pyparsing<=2.4.7" && mpiexec -n 1 python -m pytest modin/experimental/sql/test/test_sql.py | |
- run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/test_general.py | |
- run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py | |
- run: | | |
python -m pip install lazy_import | |
mpiexec -n 1 python -m pytest modin/pandas/test/integrations/ | |
- uses: ./.github/workflows/upload-coverage | |
test-all: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
matrix: | |
python-version: ["3.8"] | |
engine: ["python", "ray", "dask"] | |
test_task: | |
- group_1 | |
- group_2 | |
- group_3 | |
- group_4 | |
exclude: # python engine only have one task group that contains all the tests | |
- engine: "python" | |
test_task: "group_2" | |
- engine: "python" | |
test_task: "group_3" | |
- engine: "python" | |
test_task: "group_4" | |
env: | |
MODIN_ENGINE: ${{matrix.engine}} | |
# Only test reading from SQL server and postgres on ubuntu for now. | |
# Eventually, we should test on Windows, too, but we will have to set up | |
# the servers differently. | |
MODIN_TEST_READ_FROM_SQL_SERVER: true | |
MODIN_TEST_READ_FROM_POSTGRES: true | |
name: test-ubuntu (engine ${{matrix.engine}}, python ${{matrix.python-version}}, ${{matrix.test_task}}) | |
services: | |
# This service only needs to run for test_task group_4; however, GitHub does not | |
# currently support conditionally running services. This issue: | |
# is open https://github.com/actions/runner/issues/822 - until GitHub implements this feature, | |
# we will just have to run `moto` for all groups. | |
moto: | |
image: motoserver/moto | |
ports: | |
- 5000:5000 | |
env: | |
AWS_ACCESS_KEY_ID: foobar_key | |
AWS_SECRET_ACCESS_KEY: foobar_secret | |
steps: | |
- name: Limit ray memory | |
run: echo "MODIN_MEMORY=1000000000" >> $GITHUB_ENV | |
if: matrix.engine == 'ray' | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 2 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: environment-dev.yml | |
python-version: ${{matrix.python-version}} | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- name: Install HDF5 | |
run: sudo apt update && sudo apt install -y libhdf5-dev | |
- name: Set up postgres | |
# Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from | |
# https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3 | |
run: | | |
sudo docker pull postgres | |
sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres | |
- run: MODIN_BENCHMARK_MODE=True python -m pytest modin/pandas/test/internals/test_benchmark_mode.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_1' | |
- run: python -m pytest modin/pandas/test/internals/test_repartition.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_1' | |
- run: python -m pytest modin/test/test_partition_api.py | |
if: matrix.engine != 'python' && matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/experimental/xgboost/test/test_default.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/experimental/xgboost/test/test_xgboost.py | |
if: matrix.engine == 'ray' && matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/experimental/xgboost/test/test_dmatrix.py | |
if: matrix.engine == 'ray' && matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/experimental/batch/test/test_pipeline.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_binary.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_default.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_indexing.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_iter.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_join_sort.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_reduce.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_udf.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_window.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_pickle.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_map_metadata.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_3' | |
- run: python -m pytest -n 2 modin/pandas/test/test_series.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_3' | |
- run: MODIN_EXPERIMENTAL_GROUPBY=1 python -m pytest -n 2 modin/pandas/test/test_groupby.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_3' | |
- run: python -m pytest -n 2 modin/pandas/test/test_rolling.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/pandas/test/test_expanding.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest modin/pandas/test/test_concat.py # Ray and Dask versions fails with -n 2 | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_creation.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_arithmetic.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_axis_functions.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_logic.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_linalg.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_indexing.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_math.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_shaping.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/pandas/test/test_groupby.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/pandas/test/test_reshape.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/pandas/test/test_general.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: ./.github/workflows/sql_server/set_up_sql_server.sh | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
# Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail. | |
- run: python -m pytest modin/pandas/test/test_io.py --verbose | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest modin/experimental/pandas/test/test_io_exp.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: pip install "dfsql>=0.4.2" "pyparsing<=2.4.7" && python -m pytest modin/experimental/sql/test/test_sql.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest modin/test/interchange/dataframe_protocol/test_general.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- run: | | |
python -m pip install lazy_import | |
python -m pytest modin/pandas/test/integrations/ | |
if: matrix.engine == 'python' || matrix.test_task == 'group_4' | |
- uses: ./.github/workflows/upload-coverage | |
test-experimental: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
env: | |
MODIN_ENGINE: "python" | |
MODIN_EXPERIMENTAL: "True" | |
name: test experimental | |
services: | |
moto: | |
image: motoserver/moto | |
ports: | |
- 5000:5000 | |
env: | |
AWS_ACCESS_KEY_ID: foobar_key | |
AWS_SECRET_ACCESS_KEY: foobar_secret | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 2 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: environment-dev.yml | |
python-version: 3.8 | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_map_metadata.py | |
- run: python -m pytest -n 2 modin/pandas/test/test_series.py | |
# Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail. | |
- run: python -m pytest modin/pandas/test/test_io.py --verbose | |
- uses: ./.github/workflows/upload-coverage | |
test-cloud: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
env: | |
MODIN_ENGINE: "python" | |
MODIN_EXPERIMENTAL: "True" | |
name: test cloud | |
services: | |
moto: | |
image: motoserver/moto | |
ports: | |
- 5000:5000 | |
env: | |
AWS_ACCESS_KEY_ID: foobar_key | |
AWS_SECRET_ACCESS_KEY: foobar_secret | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 2 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: environment-dev.yml | |
python-version: 3.8 | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
# TODO(https://github.com/modin-project/modin/issues/4004): Re-add | |
# "python -m pytest --simulate-cloud=normal modin/pandas/test/test_io.py --verbose" | |
# once that test stops crashing. | |
- run: python -m pytest --simulate-cloud=normal modin/pandas/test/dataframe/test_default.py::test_kurt_kurtosis --verbose | |
- # When running without parameters, some of the tests fail | |
run: python -m pytest --simulate-cloud=normal modin/pandas/test/dataframe/test_binary.py::test_math_functions[add-rows-scalar] | |
- uses: ./.github/workflows/upload-coverage | |
test-windows: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: windows-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
matrix: | |
python-version: ["3.8"] | |
engine: ["ray", "dask"] | |
test_task: | |
- group_1 | |
- group_2 | |
- group_3 | |
- group_4 | |
env: | |
MODIN_ENGINE: ${{matrix.engine}} | |
name: test-windows (engine ${{matrix.engine}}, python ${{matrix.python-version}}, ${{matrix.test_task}}) | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 2 | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: environment-dev.yml | |
python-version: ${{matrix.python-version}} | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- name: Start local ray cluster | |
# Try a few times to start ray to work around | |
# https://github.com/modin-project/modin/issues/4562 | |
uses: nick-fields/retry@v2 | |
with: | |
timeout_minutes: 5 | |
max_attempts: 5 | |
command: | | |
ray start --head --port=6379 --object-store-memory=1000000000 | |
if: matrix.engine == 'ray' | |
- name: Tell Modin to use existing ray cluster | |
run: echo "MODIN_RAY_CLUSTER=True" >> $GITHUB_ENV | |
if: matrix.engine == 'ray' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_binary.py | |
if: matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_default.py | |
if: matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_indexing.py | |
if: matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_iter.py | |
if: matrix.test_task == 'group_1' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_join_sort.py | |
if: matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_reduce.py | |
if: matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_udf.py | |
if: matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_window.py | |
if: matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_pickle.py | |
if: matrix.test_task == 'group_2' | |
- run: python -m pytest -n 2 modin/pandas/test/test_series.py | |
if: matrix.test_task == 'group_3' | |
- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_map_metadata.py | |
if: matrix.test_task == 'group_3' | |
- run: MODIN_EXPERIMENTAL_GROUPBY=1 python -m pytest -n 2 modin/pandas/test/test_groupby.py | |
if: matrix.test_task == 'group_3' | |
- run: python -m pytest -n 2 modin/pandas/test/test_rolling.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/pandas/test/test_expanding.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest modin/pandas/test/test_concat.py # Ray and Dask versions fails with -n 2 | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_creation.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_arithmetic.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_axis_functions.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_logic.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_linalg.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_indexing.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_math.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/numpy/test/test_array_shaping.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/pandas/test/test_groupby.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/pandas/test/test_reshape.py | |
if: matrix.test_task == 'group_4' | |
- run: python -m pytest -n 2 modin/pandas/test/test_general.py | |
if: matrix.test_task == 'group_4' | |
- timeout-minutes: 60 | |
run: python -m pytest modin/pandas/test/test_io.py --verbose | |
if: matrix.test_task == 'group_4' | |
- uses: ./.github/workflows/upload-coverage | |
- name: Stop local ray cluster | |
run: ray stop | |
if: matrix.engine == 'ray' | |
- name: Rename the folder with conda packages so it won't be deleted, it's too slow on Windows. | |
run: mv "${CONDA_PKGS_DIR}" "${CONDA_PKGS_DIR}_do_not_cache" | |
test-pyarrow: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
matrix: | |
python-version: ["3.8"] | |
env: | |
MODIN_STORAGE_FORMAT: pyarrow | |
MODIN_EXPERIMENTAL: "True" | |
name: test (pyarrow, python ${{matrix.python-version}}) | |
services: | |
moto: | |
image: motoserver/moto | |
ports: | |
- 5000:5000 | |
env: | |
AWS_ACCESS_KEY_ID: foobar_key | |
AWS_SECRET_ACCESS_KEY: foobar_secret | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: environment-dev.yml | |
python-version: ${{matrix.python-version}} | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- run: sudo apt update && sudo apt install -y libhdf5-dev | |
- run: python -m pytest modin/pandas/test/test_io.py::TestCsv --verbose | |
test-spreadsheet: | |
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
strategy: | |
matrix: | |
python-version: ["3.8"] | |
engine: ["ray", "dask"] | |
env: | |
MODIN_EXPERIMENTAL: "True" | |
MODIN_ENGINE: ${{matrix.engine}} | |
name: test-spreadsheet (engine ${{matrix.engine}}, python ${{matrix.python-version}}) | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Cache conda | |
uses: actions/cache@v3 | |
with: | |
path: | | |
~/conda_pkgs_dir | |
~/.cache/pip | |
key: | |
${{ runner.os }}-conda-${{ hashFiles('environment-dev.yml') }} | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
use-mamba: true | |
activate-environment: modin | |
environment-file: environment-dev.yml | |
python-version: ${{matrix.python-version}} | |
channel-priority: strict | |
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed | |
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264 | |
use-only-tar-bz2: false | |
- name: Conda environment | |
run: | | |
conda info | |
conda list | |
- run: python -m pytest modin/experimental/spreadsheet/test/test_general.py | |
upload-coverage: | |
needs: [test-internals, test-no-engine, test-defaults, test-hdk, test-all-unidist, test-all, test-experimental, test-cloud, test-windows] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- uses: actions/checkout@v2 | |
with: | |
fetch-depth: 1 | |
- name: Download coverage data | |
uses: actions/download-artifact@v3.0.2 | |
with: | |
name: coverage-data | |
- uses: actions/setup-python@v4 | |
- run: pip install coverage | |
- name: Combine coverage | |
run: python -m coverage combine | |
- name: Generate coverage report in xml format | |
run: python -m coverage xml | |
- uses: codecov/codecov-action@v3 | |
with: | |
fail_ci_if_error: true |