FIX-#1851: Squash multiple LogicalProject nodes #285

Workflow file for this run

	name: ci
	on:
	pull_request:
	paths:
	# NOTE: keep these paths in sync with the paths that trigger the
	# fuzzydata Github Actions in .github/workflows/fuzzydata-test.yml
	- .github/workflows/**
	- .github/actions/**
	- '!.github/workflows/push-to-master.yml'
	- asv_bench/**
	- modin/**
	- requirements/**
	- scripts/**
	- environment-dev.yml
	- requirements-dev.txt
	- setup.cfg
	- setup.py
	- versioneer.py
	push:
	concurrency:
	# Cancel other jobs in the same branch. We don't care whether CI passes
	# on old commits.
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
	env:
	MODIN_GITHUB_CI: true
	jobs:
	lint-black:
	name: lint (black)
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/python-only
	- run: pip install black
	# NOTE: keep the black command here in sync with the pre-commit hook in
	# /contributing/pre-commit
	- run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py

	lint-mypy:
	name: lint (mypy)
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/python-only
	- run: pip install -r requirements-dev.txt
	- run: mypy --config-file mypy.ini

	lint-pydocstyle:
	if: github.event_name == 'pull_request'
	name: lint (pydocstyle)
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/python-only
	# The `numpydoc` version here MUST match the versions in the dev requirements files.
	- run: pip install pytest pytest-cov pydocstyle numpydoc==1.1.0 xgboost
	- run: python -m pytest scripts/test
	- run: pip install -e ".[all]"
	- run: \|
	python scripts/doc_checker.py --add-ignore=D101,D102,D103,D105 --disable-numpydoc \
	modin/pandas/dataframe.py modin/pandas/series.py \
	modin/pandas/groupby.py \
	modin/pandas/series_utils.py modin/pandas/general.py \
	modin/pandas/plotting.py modin/pandas/utils.py \
	modin/pandas/iterator.py modin/pandas/indexing.py \
	- run: python scripts/doc_checker.py modin/core/dataframe
	- run: python scripts/doc_checker.py modin/core/execution/dask
	- run: \|
	python scripts/doc_checker.py \
	modin/pandas/accessor.py modin/pandas/general.py \
	modin/pandas/groupby.py modin/pandas/indexing.py \
	modin/pandas/iterator.py modin/pandas/plotting.py \
	modin/pandas/series_utils.py modin/pandas/utils.py \
	modin/pandas/base.py \
	modin/pandas/io.py \
	asv_bench/benchmarks/utils \
	asv_bench/benchmarks/__init__.py asv_bench/benchmarks/io/__init__.py \
	asv_bench/benchmarks/scalability/__init__.py \
	modin/core/io \
	modin/experimental/core/execution/ray/implementations/pandas_on_ray \
	modin/experimental/core/execution/ray/implementations/pyarrow_on_ray \
	modin/pandas/series.py \
	modin/core/execution/python \
	modin/pandas/dataframe.py \
	modin/config/__init__.py \
	modin/config/__main__.py \
	modin/config/envvars.py \
	modin/config/pubsub.py
	- run: python scripts/doc_checker.py modin/distributed
	- run: python scripts/doc_checker.py modin/utils.py
	- run: python scripts/doc_checker.py modin/experimental/sklearn
	- run: \|
	python scripts/doc_checker.py modin/experimental/xgboost/__init__.py \
	modin/experimental/xgboost/utils.py modin/experimental/xgboost/xgboost.py \
	modin/experimental/xgboost/xgboost_ray.py
	- run: python scripts/doc_checker.py modin/core/execution/ray
	- run: \|
	python scripts/doc_checker.py modin/core/execution/dispatching/factories/factories.py \
	modin/core/execution/dispatching/factories/dispatcher.py \
	- run: python scripts/doc_checker.py scripts/doc_checker.py
	- run: \|
	python scripts/doc_checker.py modin/experimental/pandas/io.py \
	modin/experimental/pandas/numpy_wrap.py modin/experimental/pandas/__init__.py
	- run: python scripts/doc_checker.py modin/core/storage_formats/base
	- run: python scripts/doc_checker.py modin/experimental/core/storage_formats/pyarrow
	- run: python scripts/doc_checker.py modin/core/storage_formats/pandas
	- run: \|
	python scripts/doc_checker.py \
	modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe \
	modin/experimental/core/execution/native/implementations/hdk_on_native/io \
	modin/experimental/core/execution/native/implementations/hdk_on_native/partitioning \
	modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_algebra.py \
	modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_builder.py \
	modin/experimental/core/execution/native/implementations/hdk_on_native/calcite_serializer.py \
	modin/experimental/core/execution/native/implementations/hdk_on_native/df_algebra.py \
	modin/experimental/core/execution/native/implementations/hdk_on_native/expr.py \
	modin/experimental/core/execution/native/implementations/hdk_on_native/hdk_worker.py \
	- run: python scripts/doc_checker.py modin/experimental/core/storage_formats/hdk
	- run: python scripts/doc_checker.py modin/experimental/core/execution/native/implementations/hdk_on_native/interchange/dataframe_protocol
	- run: python scripts/doc_checker.py modin/experimental/batch/pipeline.py
	- run: python scripts/doc_checker.py modin/logging

	lint-flake8:
	name: lint (flake8)
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/python-only
	# NOTE: If you are changing the set of packages installed here, make sure that
	# the dev requirements match them.
	- run: pip install flake8 flake8-print flake8-no-implicit-concat
	# NOTE: keep the flake8 command here in sync with the pre-commit hook in
	# /contributing/pre-commit
	- run: flake8 modin/ asv_bench/benchmarks scripts/doc_checker.py

	test-api-and-no-engine:
	name: Test API, headers and no-engine mode
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: requirements/requirements-no-engine.yml
	- run: python -m pytest modin/pandas/test/test_api.py
	- run: python -m pytest modin/test/test_executions_api.py
	- run: python -m pytest modin/test/test_headers.py
	- run: python -m pytest modin/core/execution/dispatching/factories/test/test_dispatcher.py::test_add_option
	- uses: ./.github/actions/upload-coverage

	test-clean-install:
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	strategy:
	matrix:
	os:
	- ubuntu
	- windows
	runs-on: ${{ matrix.os }}-latest
	defaults:
	run:
	shell: bash -l {0}
	name: test-clean-install-${{ matrix.os }}
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/python-only
	- run: python -m pip install -e ".[all]"
	- name: Ensure all engines start up
	run: \|
	MODIN_ENGINE=dask python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
	MODIN_ENGINE=ray python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"
	MODIN_ENGINE=unidist UNIDIST_BACKEND=mpi mpiexec -n 1 python -c "import modin.pandas as pd; print(pd.DataFrame([1,2,3]))"

	test-internals:
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	name: test-internals
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: environment-dev.yml
	- name: Internals tests
	run: python -m pytest modin/core/execution/dispatching/factories/test/test_dispatcher.py modin/experimental/cloud/test/test_cloud.py
	- run: python -m pytest modin/config/test
	- run: python -m pytest modin/test/test_envvar_catcher.py
	- run: python -m pytest modin/test/storage_formats/base/test_internals.py
	- run: python -m pytest modin/test/storage_formats/pandas/test_internals.py
	- run: python -m pytest modin/test/test_envvar_npartitions.py
	- run: python -m pytest modin/test/test_utils.py
	- run: python -m pytest asv_bench/test/test_utils.py
	- run: python -m pytest modin/test/interchange/dataframe_protocol/base
	- run: python -m pytest modin/test/test_logging.py
	- uses: ./.github/actions/upload-coverage

	test-defaults:
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	strategy:
	matrix:
	execution: [BaseOnPython]
	env:
	MODIN_TEST_DATASET_SIZE: "small"
	name: Test ${{ matrix.execution }} execution, Python 3.8
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: environment-dev.yml
	- name: Install HDF5
	run: sudo apt update && sudo apt install -y libhdf5-dev
	- run: python -m pytest modin/experimental/xgboost/test/test_default.py --execution=${{ matrix.execution }}
	- run: python -m pytest -n 2 modin/test/storage_formats/base/test_internals.py --execution=${{ matrix.execution }}
	- uses: ./.github/actions/run-core-tests
	with:
	runner: python -m pytest --execution=${{ matrix.execution }}
	- uses: ./.github/actions/upload-coverage

	test-hdk:
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	env:
	MODIN_EXPERIMENTAL: "True"
	MODIN_ENGINE: "native"
	MODIN_STORAGE_FORMAT: "hdk"
	name: Test HDK storage format, Python 3.8
	services:
	moto:
	image: motoserver/moto
	ports:
	- 5000:5000
	env:
	AWS_ACCESS_KEY_ID: foobar_key
	AWS_SECRET_ACCESS_KEY: foobar_secret
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: requirements/env_hdk.yml
	activate-environment: modin_on_hdk
	- name: Install HDF5
	run: sudo apt update && sudo apt install -y libhdf5-dev
	- run: python -m pytest modin/test/storage_formats/hdk/test_internals.py
	- run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_init.py
	- run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_dataframe.py
	- run: python -m pytest modin/experimental/core/execution/native/implementations/hdk_on_native/test/test_utils.py
	- run: python -m pytest modin/pandas/test/test_io.py --verbose
	- run: python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
	- run: python -m pytest modin/test/interchange/dataframe_protocol/hdk
	- run: python -m pytest modin/experimental/sql/test/test_sql.py
	- run: python -m pytest modin/pandas/test/test_concat.py
	- run: python -m pytest modin/pandas/test/dataframe/test_binary.py
	- run: python -m pytest modin/pandas/test/dataframe/test_reduce.py
	- run: python -m pytest modin/pandas/test/dataframe/test_join_sort.py
	- run: python -m pytest modin/pandas/test/test_general.py
	- run: python -m pytest modin/pandas/test/dataframe/test_indexing.py
	- run: python -m pytest modin/pandas/test/test_series.py
	- run: python -m pytest modin/pandas/test/dataframe/test_map_metadata.py
	- run: python -m pytest modin/pandas/test/dataframe/test_window.py
	- run: python -m pytest modin/pandas/test/dataframe/test_default.py
	- run: python examples/docker/modin-hdk/census-hdk.py examples/data/census_1k.csv -no-ml
	- run: python examples/docker/modin-hdk/nyc-taxi-hdk.py examples/data/nyc-taxi_1k.csv
	- run: \|
	python examples/docker/modin-hdk/plasticc-hdk.py \
	examples/data/plasticc_training_set_1k.csv \
	examples/data/plasticc_test_set_1k.csv \
	examples/data/plasticc_training_set_metadata_1k.csv \
	examples/data/plasticc_test_set_metadata_1k.csv \
	-no-ml
	- uses: ./.github/actions/upload-coverage

	test-asv-benchmarks:
	if: github.event_name == 'pull_request'
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	env:
	MODIN_ENGINE: ray
	MODIN_MEMORY: 1000000000
	MODIN_TEST_DATASET_SIZE: small
	name: test-asv-benchmarks
	steps:
	- uses: actions/checkout@v3
	with:
	fetch-depth: 1
	- uses: conda-incubator/setup-miniconda@v2
	with:
	auto-activate-base: true
	activate-environment: ""
	- name: ASV installation
	run: \|
	# FIXME: use the tag or release version of ASV as soon as it appears;
	# The ability to build a conda environment by specifying yml file has not
	# yet appeared in the release versions;
	pip install git+https://github.com/airspeed-velocity/asv.git@ef016e233cb9a0b19d517135104f49e0a3c380e9
	- name: Running benchmarks
	run: \|
	git remote add upstream https://github.com/modin-project/modin.git
	git fetch upstream
	if git diff upstream/master --name-only \| grep -q "^asv_bench/"; then
	# ASV correctly creates environments for testing only from the branch
	# with `master` name
	git checkout -b master
	cd asv_bench
	asv check -v

	asv machine --yes

	# check Modin on Ray
	asv run --quick --strict --show-stderr --launch-method=spawn \
	-b ^benchmarks -b ^io -b ^scalability \| tee benchmarks.log

	# check pure pandas
	MODIN_ASV_USE_IMPL=pandas asv run --quick --strict --show-stderr --launch-method=spawn \
	-b ^benchmarks -b ^io \| tee benchmarks.log

	# HDK: ERR_OUT_OF_CPU_MEM: Not enough host memory to execute the query (MODIN#4270)
	# just disable test for testing - it works well in a machine with more memory
	sed -i 's/def time_groupby_agg_nunique(self, \args, \\kwargs):/# def time_groupby_agg_nunique(self, args, **kwargs):/g' benchmarks/hdk/benchmarks.py
	sed -i 's/execute(self.df.groupby(by=self.groupby_columns).agg("nunique"))/# execute(self.df.groupby(by=self.groupby_columns).agg("nunique"))/g' benchmarks/hdk/benchmarks.py

	# check Modin on HDK
	MODIN_ENGINE=native MODIN_STORAGE_FORMAT=hdk MODIN_EXPERIMENTAL=true asv run --quick --strict --show-stderr \
	--launch-method=forkserver --config asv.conf.hdk.json \
	-b ^hdk \| tee benchmarks.log
	else
	echo "Benchmarks did not run, no changes detected"
	fi
	if: always()

	- name: Publish benchmarks artifact
	uses: actions/upload-artifact@master
	with:
	name: Benchmarks log
	path: asv_bench/benchmarks.log
	if: failure()

	test-all-unidist:
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	strategy:
	matrix:
	python-version: ["3.8"]
	unidist-backend: ["mpi"]
	env:
	MODIN_ENGINE: "Unidist"
	UNIDIST_BACKEND: ${{matrix.unidist-backend}}
	# Only test reading from SQL server and postgres on ubuntu for now.
	# Eventually, we should test on Windows, too, but we will have to set up
	# the servers differently.
	MODIN_TEST_READ_FROM_SQL_SERVER: true
	MODIN_TEST_READ_FROM_POSTGRES: true
	name: test-ubuntu (engine unidist ${{matrix.unidist-backend}}, python ${{matrix.python-version}})
	services:
	moto:
	image: motoserver/moto
	ports:
	- 5000:5000
	env:
	AWS_ACCESS_KEY_ID: foobar_key
	AWS_SECRET_ACCESS_KEY: foobar_secret
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: requirements/env_unidist.yml
	activate-environment: modin_on_unidist
	python-version: ${{matrix.python-version}}
	- name: Install HDF5
	run: sudo apt update && sudo apt install -y libhdf5-dev
	- name: Set up postgres
	# Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from
	# https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3
	run: \|
	sudo docker pull postgres
	sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres
	- run: MODIN_BENCHMARK_MODE=True mpiexec -n 1 python -m pytest modin/pandas/test/internals/test_benchmark_mode.py
	- run: mpiexec -n 1 python -m pytest modin/pandas/test/internals/test_repartition.py
	- run: mpiexec -n 1 python -m pytest modin/test/test_partition_api.py
	- uses: ./.github/actions/run-core-tests
	with:
	runner: mpiexec -n 1 python -m pytest
	parallel: ""
	- run: mpiexec -n 1 python -m pytest modin/numpy/test
	- run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh
	- run: ./.github/workflows/sql_server/set_up_sql_server.sh
	# need an extra argument "genv" to set environment variables for mpiexec. We need
	# these variables to test writing to the mock s3 filesystem.
	- run: mpiexec -n 1 -genv AWS_ACCESS_KEY_ID foobar_key -genv AWS_SECRET_ACCESS_KEY foobar_secret python -m pytest modin/pandas/test/test_io.py --verbose
	- run: mpiexec -n 1 python -m pytest modin/experimental/pandas/test/test_io_exp.py
	- run: pip install "dfsql>=0.4.2" "pyparsing<=2.4.7" && mpiexec -n 1 python -m pytest modin/experimental/sql/test/test_sql.py
	- run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
	- run: mpiexec -n 1 python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py
	- run: \|
	python -m pip install lazy_import
	mpiexec -n 1 python -m pytest modin/pandas/test/integrations/
	- uses: ./.github/actions/upload-coverage

	test-all:
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	strategy:
	matrix:
	os:
	- ubuntu
	- windows
	python-version: ["3.8"]
	engine: ["python", "ray", "dask"]
	test_task:
	- group_1
	- group_2
	- group_3
	- group_4
	exclude: # python engine only have one task group that contains all the tests
	- engine: "python"
	test_task: "group_2"
	- engine: "python"
	test_task: "group_3"
	- engine: "python"
	test_task: "group_4"
	runs-on: ${{ matrix.os }}-latest
	defaults:
	run:
	shell: bash -l {0}
	env:
	MODIN_ENGINE: ${{matrix.engine}}
	# Only test reading from SQL server and postgres on ubuntu for now.
	# Eventually, we should test on Windows, too, but we will have to set up
	# the servers differently.
	MODIN_TEST_READ_FROM_SQL_SERVER: ${{ matrix.os == 'ubuntu' }}
	MODIN_TEST_READ_FROM_POSTGRES: ${{ matrix.os == 'ubuntu' }}
	name: test-${{ matrix.os }} (engine ${{matrix.engine}}, python ${{matrix.python-version}}, ${{matrix.test_task}})
	services:
	# Using workaround https://github.com/actions/runner/issues/822#issuecomment-1524826092
	moto:
	# we only need moto service on Ubuntu and for group_4 task or python engine
	image: ${{ (matrix.os == 'ubuntu' && (matrix.engine == 'python' \|\| matrix.test_task == 'group_4')) && 'motoserver/moto' \|\| '' }}
	ports:
	- 5000:5000
	env:
	AWS_ACCESS_KEY_ID: foobar_key
	AWS_SECRET_ACCESS_KEY: foobar_secret
	steps:
	- name: Limit ray memory
	run: echo "MODIN_MEMORY=1000000000" >> $GITHUB_ENV
	if: matrix.os == 'ubuntu' && matrix.engine == 'ray'
	- name: Tell Modin to use existing ray cluster
	run: echo "MODIN_RAY_CLUSTER=True" >> $GITHUB_ENV
	if: matrix.os == 'windows' && matrix.engine == 'ray'
	- uses: actions/checkout@v3
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: environment-dev.yml
	python-version: ${{matrix.python-version}}
	- name: Start local ray cluster
	# Try a few times to start ray to work around
	# https://github.com/modin-project/modin/issues/4562
	uses: nick-fields/retry@v2
	with:
	timeout_minutes: 5
	max_attempts: 5
	command: ray start --head --port=6379 --object-store-memory=1000000000
	if: matrix.os == 'windows' && matrix.engine == 'ray'
	- name: Install HDF5
	run: sudo apt update && sudo apt install -y libhdf5-dev
	if: matrix.os == 'ubuntu'
	- name: Set up postgres
	# Locally, specifying port 2345:5432 works, but 2345:2345 and 5432:5432 do not. This solution is from
	# https://stackoverflow.com/questions/36415654/cant-connect-docker-postgresql-9-3
	run: \|
	sudo docker pull postgres
	sudo docker run --name some-postgres -e POSTGRES_USER=sa -e POSTGRES_PASSWORD=Strong.Pwd-123 -e POSTGRES_DB=postgres -d -p 2345:5432 postgres
	if: matrix.os == 'ubuntu'

	- run: MODIN_BENCHMARK_MODE=True python -m pytest modin/pandas/test/internals/test_benchmark_mode.py
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_1'
	- run: python -m pytest modin/pandas/test/internals/test_repartition.py
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_1'
	- run: python -m pytest modin/test/test_partition_api.py
	if: matrix.engine != 'python' && matrix.test_task == 'group_1'
	- run: python -m pytest -n 2 modin/experimental/xgboost/test/test_default.py
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_1'
	- run: python -m pytest -n 2 modin/experimental/xgboost/test/test_xgboost.py
	if: matrix.os == 'ubuntu' && matrix.engine == 'ray' && matrix.test_task == 'group_1'
	- run: python -m pytest -n 2 modin/experimental/xgboost/test/test_dmatrix.py
	if: matrix.engine == 'ray' && matrix.test_task == 'group_1'
	- run: python -m pytest -n 2 modin/experimental/batch/test/test_pipeline.py
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_1'
	- uses: ./.github/actions/run-core-tests/group_1
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_1'
	- uses: ./.github/actions/run-core-tests/group_2
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_2'
	- uses: ./.github/actions/run-core-tests/group_3
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_3'
	- uses: ./.github/actions/run-core-tests/group_4
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_4'
	- run: python -m pytest -n 2 modin/numpy/test
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_4'
	- run: chmod +x ./.github/workflows/sql_server/set_up_sql_server.sh
	if: matrix.os == 'ubuntu' && (matrix.engine == 'python' \|\| matrix.test_task == 'group_4')
	- run: ./.github/workflows/sql_server/set_up_sql_server.sh
	if: matrix.os == 'ubuntu' && (matrix.engine == 'python' \|\| matrix.test_task == 'group_4')
	# Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail.
	- run: python -m pytest modin/pandas/test/test_io.py --verbose
	timeout-minutes: 60
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_4'
	- run: python -m pytest modin/experimental/pandas/test/test_io_exp.py
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_4'
	- run: pip install "dfsql>=0.4.2" "pyparsing<=2.4.7" && python -m pytest modin/experimental/sql/test/test_sql.py
	if: matrix.os == 'ubuntu' && (matrix.engine == 'python' \|\| matrix.test_task == 'group_4')
	- run: python -m pytest modin/test/interchange/dataframe_protocol/test_general.py
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_4'
	- run: python -m pytest modin/test/interchange/dataframe_protocol/pandas/test_protocol.py
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_4'
	- run: \|
	python -m pip install lazy_import
	python -m pytest modin/pandas/test/integrations/
	if: matrix.engine == 'python' \|\| matrix.test_task == 'group_4'

	- uses: ./.github/actions/upload-coverage
	- name: Stop local ray cluster
	run: ray stop
	if: matrix.os == 'windows' && matrix.engine == 'ray'
	- name: Rename the folder with conda packages so it won't be deleted, it's too slow on Windows.
	run: mv "${CONDA_PKGS_DIR}" "${CONDA_PKGS_DIR}_do_not_cache"
	if: matrix.os == 'windows' && steps.cache-conda.outputs.cache-hit == 'true'

	test-experimental:
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	env:
	MODIN_ENGINE: "python"
	MODIN_EXPERIMENTAL: "True"
	name: test experimental
	services:
	moto:
	image: motoserver/moto
	ports:
	- 5000:5000
	env:
	AWS_ACCESS_KEY_ID: foobar_key
	AWS_SECRET_ACCESS_KEY: foobar_secret
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: environment-dev.yml
	- run: python -m pytest -n 2 modin/pandas/test/dataframe/test_map_metadata.py
	- run: python -m pytest -n 2 modin/pandas/test/test_series.py
	# Do not add parallelism (`-n` argument) here - it will cause mock S3 service to fail.
	- run: python -m pytest modin/pandas/test/test_io.py --verbose
	- uses: ./.github/actions/upload-coverage

	test-cloud:
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	env:
	MODIN_ENGINE: "python"
	MODIN_EXPERIMENTAL: "True"
	name: test cloud
	services:
	moto:
	image: motoserver/moto
	ports:
	- 5000:5000
	env:
	AWS_ACCESS_KEY_ID: foobar_key
	AWS_SECRET_ACCESS_KEY: foobar_secret
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: environment-dev.yml
	# TODO(https://github.com/modin-project/modin/issues/4004): Re-add
	# "python -m pytest --simulate-cloud=normal modin/pandas/test/test_io.py --verbose"
	# once that test stops crashing.
	- run: python -m pytest --simulate-cloud=normal modin/pandas/test/dataframe/test_default.py::test_kurt_kurtosis --verbose
	- # When running without parameters, some of the tests fail
	run: python -m pytest --simulate-cloud=normal modin/pandas/test/dataframe/test_binary.py::test_math_functions[add-rows-scalar]
	- uses: ./.github/actions/upload-coverage

	test-pyarrow:
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	strategy:
	matrix:
	python-version: ["3.8"]
	env:
	MODIN_STORAGE_FORMAT: pyarrow
	MODIN_EXPERIMENTAL: "True"
	name: test (pyarrow, python ${{matrix.python-version}})
	services:
	moto:
	image: motoserver/moto
	ports:
	- 5000:5000
	env:
	AWS_ACCESS_KEY_ID: foobar_key
	AWS_SECRET_ACCESS_KEY: foobar_secret
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: environment-dev.yml
	python-version: ${{matrix.python-version}}
	- run: sudo apt update && sudo apt install -y libhdf5-dev
	- run: python -m pytest modin/pandas/test/test_io.py::TestCsv --verbose

	test-spreadsheet:
	needs: [lint-flake8, lint-black, lint-mypy, test-api-and-no-engine]
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	strategy:
	matrix:
	python-version: ["3.8"]
	engine: ["ray", "dask"]
	env:
	MODIN_EXPERIMENTAL: "True"
	MODIN_ENGINE: ${{matrix.engine}}
	name: test-spreadsheet (engine ${{matrix.engine}}, python ${{matrix.python-version}})
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/mamba-env
	with:
	environment-file: environment-dev.yml
	python-version: ${{matrix.python-version}}
	- run: python -m pytest modin/experimental/spreadsheet/test/test_general.py

	upload-coverage:
	needs: [test-internals, test-api-and-no-engine, test-defaults, test-hdk, test-all-unidist, test-all, test-experimental, test-cloud]
	runs-on: ubuntu-latest
	defaults:
	run:
	shell: bash -l {0}
	steps:
	- uses: actions/checkout@v3
	- uses: ./.github/actions/python-only
	- name: Download coverage data
	uses: actions/download-artifact@v3.0.2
	with:
	name: coverage-data
	- run: pip install coverage
	- name: Combine coverage
	run: python -m coverage combine
	- name: Generate coverage report in xml format
	run: python -m coverage xml
	- uses: codecov/codecov-action@v3
	with:
	fail_ci_if_error: true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FIX-#1851: Squash multiple LogicalProject nodes #285

Workflow file

FIX-#1851: Squash multiple LogicalProject nodes #285

Jobs

Run details

Workflow file for this run