diff --git a/ci/docker/conda-python-spark.dockerfile b/ci/docker/conda-python-spark.dockerfile index 861d83fe607fe..58e3d5e5d56a2 100644 --- a/ci/docker/conda-python-spark.dockerfile +++ b/ci/docker/conda-python-spark.dockerfile @@ -23,11 +23,16 @@ FROM ${repo}:${arch}-conda-python-${python} ARG jdk=8 ARG maven=3.5 +ARG numpy=latest +COPY ci/scripts/install_numpy.sh /arrow/ci/scripts/ + RUN mamba install -q -y \ openjdk=${jdk} \ maven=${maven} \ pandas && \ - mamba clean --all + mamba clean --all && \ + mamba uninstall -q -y numpy && \ + /arrow/ci/scripts/install_numpy.sh ${numpy} # installing specific version of spark ARG spark=master diff --git a/ci/scripts/install_numpy.sh b/ci/scripts/install_numpy.sh new file mode 100755 index 0000000000000..f04fe81b6696e --- /dev/null +++ b/ci/scripts/install_numpy.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ $# -gt 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +numpy=${1:-"latest"} + +if [ "${numpy}" = "latest" ]; then + pip install numpy +else + pip install numpy==${numpy} +fi diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 4c4302a72f939..b345bcd48e9a2 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1589,9 +1589,9 @@ tasks: image: conda-python-hdfs {% endfor %} -{% for python_version, spark_version, test_pyarrow_only in [("3.7", "v3.1.2", "false"), - ("3.8", "v3.2.0", "false"), - ("3.9", "master", "false")] %} +{% for python_version, spark_version, test_pyarrow_only, numpy_version in [("3.7", "v3.1.2", "false", "latest"), + ("3.8", "v3.2.0", "false", "1.23"), + ("3.9", "master", "false", "latest")] %} test-conda-python-{{ python_version }}-spark-{{ spark_version }}: ci: github template: docker-tests/github.linux.yml @@ -1600,6 +1600,7 @@ tasks: PYTHON: "{{ python_version }}" SPARK: "{{ spark_version }}" TEST_PYARROW_ONLY: "{{ test_pyarrow_only }}" + NUMPY: "{{ numpy_version }}" # use the branch-3.0 of spark, so prevent reusing any layers flags: --no-leaf-cache image: conda-python-spark diff --git a/docker-compose.yml b/docker-compose.yml index 12071a57bd3c7..c9b02c45d1c5d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1788,6 +1788,7 @@ services: # be set to ${MAVEN} maven: 3.5 spark: ${SPARK} + numpy: ${NUMPY} shm_size: *shm-size environment: <<: *ccache