Skip to content

Commit 56a9862

Browse files
joemarshalljorisvandenbosscheraulcdkou
committed
GH-41910: [Python] Add support for Pyodide (#37822)
pyarrow knows about ARROW_ENABLE_THREADING and doesn't use threads if they are not enabled in libarrow. Split from #37696 * GitHub Issue: #41910 Lead-authored-by: Joe Marshall <joe.marshall@nottingham.ac.uk> Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Co-authored-by: Raúl Cumplido <raulcumplido@gmail.com> Co-authored-by: Sutou Kouhei <kou@cozmixng.org> Signed-off-by: Sutou Kouhei <kou@clear-code.com>
1 parent 14e4684 commit 56a9862

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+940
-65
lines changed

.pre-commit-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ repos:
3838
# files: (/Dockerfile|\.dockerfile)$
3939
files: >-
4040
(
41+
?^ci/docker/conda-python-emscripten\.dockerfile$|
4142
?^ci/docker/python-wheel-windows-test-vs2019\.dockerfile$|
4243
)
4344
types: []
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
ARG repo
19+
ARG arch
20+
ARG python="3.12"
21+
FROM ${repo}:${arch}-conda-python-${python}
22+
23+
ARG selenium_version="4.15.2"
24+
ARG pyodide_version="0.26.0"
25+
ARG chrome_version="latest"
26+
ARG required_python_min="(3,12)"
27+
# fail if python version < 3.12
28+
RUN echo "check PYTHON>=${required_python_min}" && python -c "import sys;sys.exit(0 if sys.version_info>=${required_python_min} else 1)"
29+
30+
# install selenium and pyodide-build and recent python
31+
32+
# needs to be a login shell so ~/.profile is read
33+
SHELL ["/bin/bash", "--login", "-c", "-o", "pipefail"]
34+
35+
RUN python -m pip install --no-cache-dir selenium==${selenium_version} && \
36+
python -m pip install --no-cache-dir --upgrade pyodide-build==${pyodide_version}
37+
38+
# install pyodide dist directory to /pyodide
39+
RUN pyodide_dist_url="https://github.com/pyodide/pyodide/releases/download/${pyodide_version}/pyodide-${pyodide_version}.tar.bz2" && \
40+
wget -q "${pyodide_dist_url}" -O- | tar -xj -C /
41+
42+
# install correct version of emscripten for this pyodide
43+
COPY ci/scripts/install_emscripten.sh /arrow/ci/scripts/
44+
RUN bash /arrow/ci/scripts/install_emscripten.sh ~ /pyodide
45+
46+
# make sure zlib is cached in the EMSDK folder
47+
RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib
48+
49+
# install node 20 (needed for async call support)
50+
# and pthread-stubs for build, and unzip needed for chrome build to work
51+
RUN conda install nodejs=20 unzip pthread-stubs make -c conda-forge
52+
53+
# install chrome for testing browser based runner
54+
COPY ci/scripts/install_chromedriver.sh /arrow/ci/scripts/
55+
RUN /arrow/ci/scripts/install_chromedriver.sh "${chrome_version}"
56+
57+
# make the version of make that is installed by conda be available everywhere
58+
# or else pyodide's isolated build fails to find it
59+
RUN ln -s "$(type -P make)" /bin/make
60+
61+
ENV ARROW_BUILD_TESTS="OFF" \
62+
ARROW_BUILD_TYPE="release" \
63+
ARROW_DEPENDENCY_SOURCE="BUNDLED" \
64+
ARROW_EMSCRIPTEN="ON"

ci/scripts/cpp_build.sh

+5-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if [ -x "$(command -v git)" ]; then
3030
fi
3131

3232
# TODO(kszucs): consider to move these to CMake
33-
if [ ! -z "${CONDA_PREFIX}" ]; then
33+
if [ ! -z "${CONDA_PREFIX}" ] && [ "${ARROW_EMSCRIPTEN:-OFF}" = "OFF" ]; then
3434
echo -e "===\n=== Conda environment for build\n==="
3535
conda list
3636

@@ -99,6 +99,10 @@ if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
9999
fi
100100
n_jobs=2 # Emscripten build fails on docker unless this is set really low
101101
source ~/emsdk/emsdk_env.sh
102+
export CMAKE_INSTALL_PREFIX=$(em-config CACHE)/sysroot
103+
# conda sets LDFLAGS / CFLAGS etc. which break
104+
# emcmake so we unset them
105+
unset LDFLAGS CFLAGS CXXFLAGS CPPFLAGS
102106
emcmake cmake \
103107
--preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \
104108
-DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \

ci/scripts/install_chromedriver.sh

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
# Install Chrome and Chromedriver for Selenium
21+
22+
set -e
23+
24+
chrome_version=$1
25+
26+
if [ $chrome_version = "latest" ]; then
27+
latest_release_path=LATEST_RELEASE_STABLE
28+
else
29+
latest_release_path=LATEST_RELEASE_${chrome_version}
30+
fi
31+
CHROME_VERSION_FULL=$(wget -q --no-verbose -O - "https://googlechromelabs.github.io/chrome-for-testing/${latest_release_path}")
32+
CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION_FULL}-1_amd64.deb"
33+
CHROMEDRIVER_DOWNLOAD_URL="https://storage.googleapis.com/chrome-for-testing-public/${CHROME_VERSION_FULL}/linux64/chromedriver-linux64.zip"
34+
wget -q --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}"
35+
apt-get update
36+
apt install -qqy /tmp/google-chrome.deb
37+
rm -f /tmp/google-chrome.deb
38+
rm -rf /var/lib/apt/lists/*
39+
wget --no-verbose -O /tmp/chromedriver-linux64.zip "${CHROMEDRIVER_DOWNLOAD_URL}"
40+
unzip /tmp/chromedriver-linux64.zip -d /opt/
41+
rm /tmp/chromedriver-linux64.zip
42+
ln -fs /opt/chromedriver-linux64/chromedriver /usr/local/bin/chromedriver
43+
echo "Using Chrome version: $(google-chrome --version)"
44+
echo "Using Chrome Driver version: $(chromedriver --version)"

ci/scripts/install_emscripten.sh

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
# install emscripten sdk version to match pyodide in $2 to directory $1/emsdk
21+
22+
set -e
23+
24+
target_path=$1
25+
pyodide_path=$2
26+
27+
emscripten_version=$(${pyodide_path}/python -c "import sys;print(*sys._emscripten_info.emscripten_version,sep='.')")
28+
29+
cd ${target_path}
30+
if [ ! -d emsdk ]; then
31+
git clone https://github.com/emscripten-core/emsdk.git
32+
fi
33+
cd emsdk
34+
./emsdk install ${emscripten_version}
35+
./emsdk activate ${emscripten_version}
36+
echo "Installed emsdk to: ${target_path}"

ci/scripts/python_build_emscripten.sh

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
set -ex
21+
22+
arrow_dir=${1}
23+
build_dir=${2}
24+
25+
26+
source ~/emsdk/emsdk_env.sh
27+
28+
source_dir=${arrow_dir}/python
29+
python_build_dir=${build_dir}/python
30+
31+
rm -rf ${python_build_dir}
32+
cp -aL ${source_dir} ${python_build_dir}
33+
34+
# conda sets LDFLAGS / CFLAGS etc. which break
35+
# emcmake so we unset them
36+
unset LDFLAGS CFLAGS CXXFLAGS CPPFLAGS
37+
38+
pushd ${python_build_dir}
39+
pyodide build
40+
popd

ci/scripts/python_test_emscripten.sh

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
# run tests against Chrome and node.js as representative
21+
# WebAssembly platforms (i.e. one browser, one non-browser).
22+
23+
set -ex
24+
25+
build_dir=${1}/python
26+
pyodide_dist_dir=${2}
27+
28+
cd ${build_dir}
29+
30+
# note: this uses the newest wheel in dist
31+
pyodide_wheel=$(ls -t dist/pyarrow*.whl | head -1)
32+
33+
echo "-------------- Running emscripten tests in Node ----------------------"
34+
python scripts/run_emscripten_tests.py ${pyodide_wheel} --dist-dir=${pyodide_dist_dir} --runtime=node
35+
36+
echo "-------------- Running emscripten tests in Chrome --------------------"
37+
python scripts/run_emscripten_tests.py ${pyodide_wheel} --dist-dir=${pyodide_dist_dir} --runtime=chrome
38+

cpp/CMakePresets.json

+2
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,15 @@
5353
"ARROW_ACERO": "ON",
5454
"ARROW_BUILD_SHARED": "OFF",
5555
"ARROW_BUILD_STATIC": "ON",
56+
"ARROW_CSV": "ON",
5657
"ARROW_CUDA": "OFF",
5758
"ARROW_DEPENDENCY_SOURCE": "BUNDLED",
5859
"ARROW_DEPENDENCY_USE_SHARED": "OFF",
5960
"ARROW_ENABLE_THREADING": "OFF",
6061
"ARROW_FLIGHT": "OFF",
6162
"ARROW_IPC": "ON",
6263
"ARROW_JEMALLOC": "OFF",
64+
"ARROW_JSON": "ON",
6365
"ARROW_MIMALLOC": "OFF",
6466
"ARROW_ORC": "ON",
6567
"ARROW_RUNTIME_SIMD_LEVEL": "NONE",

cpp/cmake_modules/ThirdpartyToolchain.cmake

+1
Original file line numberDiff line numberDiff line change
@@ -4551,6 +4551,7 @@ macro(build_orc)
45514551
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
45524552
CMAKE_ARGS ${ORC_CMAKE_ARGS}
45534553
DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
4554+
${ARROW_PROTOBUF_PROTOC}
45544555
${ARROW_ZSTD_LIBZSTD}
45554556
${Snappy_TARGET}
45564557
LZ4::lz4

dev/tasks/tasks.yml

+9
Original file line numberDiff line numberDiff line change
@@ -1194,6 +1194,15 @@ tasks:
11941194
image: conda-python
11951195
{% endfor %}
11961196

1197+
test-conda-python-emscripten:
1198+
ci: github
1199+
template: docker-tests/github.linux.yml
1200+
params:
1201+
env:
1202+
UBUNTU: 22.04
1203+
PYTHON: 3.12
1204+
image: conda-python-emscripten
1205+
11971206
test-conda-python-3.11-hypothesis:
11981207
ci: github
11991208
template: docker-tests/github.linux.yml

docker-compose.yml

+33
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ x-hierarchy:
121121
- conda-python-docs
122122
- conda-python-cython2
123123
- conda-python-dask
124+
- conda-python-emscripten
124125
- conda-python-hdfs
125126
- conda-python-java-integration
126127
- conda-python-jpype
@@ -875,6 +876,38 @@ services:
875876
/arrow/ci/scripts/python_build.sh /arrow /build &&
876877
/arrow/ci/scripts/python_test.sh /arrow"]
877878

879+
conda-python-emscripten:
880+
# Usage:
881+
# docker-compose build conda-python-emscripten
882+
# docker-compose run --rm conda-python-emscripten
883+
# Parameters:
884+
# ARCH: amd64, arm64v8, ...
885+
# UBUNTU: 22.04
886+
image: ${REPO}:${ARCH}-conda-python-emscripten
887+
build:
888+
context: .
889+
dockerfile: ci/docker/conda-python-emscripten.dockerfile
890+
cache_from:
891+
- ${REPO}:${ARCH}-conda-python-${PYTHON}
892+
args:
893+
repo: ${REPO}
894+
arch: ${ARCH}
895+
clang_tools: ${CLANG_TOOLS}
896+
llvm: ${LLVM}
897+
pyodide_version: "0.26.0"
898+
chrome_version: "122"
899+
selenium_version: "4.15.2"
900+
required_python_min: "(3,12)"
901+
python: ${PYTHON}
902+
shm_size: *shm-size
903+
volumes: *ubuntu-volumes
904+
environment:
905+
<<: [*common, *ccache, *sccache, *cpp]
906+
command: ["
907+
/arrow/ci/scripts/cpp_build.sh /arrow /build &&
908+
/arrow/ci/scripts/python_build_emscripten.sh /arrow /build &&
909+
/arrow/ci/scripts/python_test_emscripten.sh /build /pyodide"]
910+
878911
ubuntu-cuda-python:
879912
# Usage:
880913
# docker-compose build cuda-cpp

0 commit comments

Comments
 (0)