From 0e204587bb72b7d1a42f025e58b1d221d4932477 Mon Sep 17 00:00:00 2001 From: Junduo Dong Date: Thu, 10 Oct 2024 02:34:26 +0800 Subject: [PATCH] feat(bazel): support building C++ libs on windows platform (#1873) ## What does this PR do? Make bazel happy on `Windows OS`. 1. Rewrite a little codes in `row.cc`, then it could be compiled by MSVC ([VLA is not supported](https://devblogs.microsoft.com/cppblog/c11-and-c17-standard-support-arriving-in-msvc/)) 2. Add MSVC flag [`/Zc:preprocessor`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-preprocessor?view=msvc-170) to enable C99/C11 3. Add MSVC flag [`/utf-8`](https://learn.microsoft.com/en-us/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8?view=msvc-170) to set source and execution character sets to UTF-8. 4. Load arrow(arrow_python) interface libraries `*.lib` to the linker 5. Add a new CI env `windows-2022` to build fury cpp libraries ## Related issues #798 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --------- Signed-off-by: Junduo Dong --- .bazelrc | 4 +- .github/workflows/ci.yml | 2 +- bazel/arrow/BUILD.windows.bzl | 62 +++++++++++++++++++++++++++++++ bazel/arrow/pyarrow_configure.bzl | 58 +++++++++++++++++++++-------- ci/run_ci.py | 4 +- cpp/fury/row/row.cc | 4 +- 6 files changed, 114 insertions(+), 20 deletions(-) create mode 100644 bazel/arrow/BUILD.windows.bzl diff --git a/.bazelrc b/.bazelrc index 6eb0a42d2d..2ab6340c8b 100644 --- a/.bazelrc +++ b/.bazelrc @@ -29,6 +29,7 @@ build:linux --force_pic build:macos --force_pic build:clang-cl --compiler=clang-cl build:msvc --compiler=msvc-cl +build:windows --compiler=msvc-cl # test config test --build_tests_only @@ -38,4 +39,5 @@ test --test_output=all build:linux --cxxopt="-std=c++17" --linkopt="-pthread" build:macos --cxxopt="-std=c++17" --linkopt="-pthread" build:clang-cl --cxxopt="-std=c++17" -build:windows --cxxopt="/std:c++17" +build:windows --cxxopt="/std:c++17" --cxxopt="/Zc:preprocessor" --cxxopt="/utf-8" +build:msvc --cxxopt="/std:c++17" --cxxopt="/Zc:preprocessor" --cxxopt="/utf-8" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 002b66e2c0..212fcf106d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -181,7 +181,7 @@ jobs: name: C++ CI strategy: matrix: - os: [ubuntu-latest, macos-12, macos-14] # macos-12: x86, macos-14: arm64 + os: [ubuntu-latest, macos-12, macos-14, windows-2022] # macos-12: x86, macos-14: arm64 runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 diff --git a/bazel/arrow/BUILD.windows.bzl b/bazel/arrow/BUILD.windows.bzl new file mode 100644 index 0000000000..24557fc870 --- /dev/null +++ b/bazel/arrow/BUILD.windows.bzl @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +package(default_visibility = ["//visibility:public"]) + +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_import") + +cc_library( + name = "arrow", + hdrs = [":arrow_header_include"], + includes = ["include"], + deps = [":arrow_shared_library"], + visibility = ["//visibility:public"], +) + +cc_import( + name = "arrow_shared_library", + interface_library = ":libarrow_interface", + shared_library = ":libarrow", + visibility = ["//visibility:public"], +) + +cc_import( + name = "arrow_python_shared_library", + interface_library = ":libarrow_python_interface", + shared_library = ":libarrow_python", + visibility = ["//visibility:public"], +) + +cc_library( + name = "arrow_header_lib", + hdrs = [":arrow_header_include"], + includes = ["include"], + visibility = ["//visibility:public"], +) + +cc_library( + name="python_numpy_headers", + hdrs=[":python_numpy_include"], + includes=["python_numpy_include"], +) + +%{ARROW_HEADER_GENRULE} +%{ARROW_LIBRARY_GENRULE} +%{ARROW_ITF_LIBRARY_GENRULE} +%{ARROW_PYTHON_LIBRARY_GENRULE} +%{ARROW_PYTHON_ITF_LIB_GENRULE} +%{PYTHON_NUMPY_INCLUDE_GENRULE} diff --git a/bazel/arrow/pyarrow_configure.bzl b/bazel/arrow/pyarrow_configure.bzl index c915097e08..15c65a3dd6 100644 --- a/bazel/arrow/pyarrow_configure.bzl +++ b/bazel/arrow/pyarrow_configure.bzl @@ -172,12 +172,12 @@ def _get_pyarrow_include(repository_ctx, python_bin="python3"): error_details=( "Is the Python binary path set up right? " + "(See ./configure or " + python_bin + ".) " + "Is distutils installed?")) - return result.stdout.splitlines()[0] + return result.stdout.splitlines()[0].replace('\\', '/') def _get_pyarrow_shared_library(repository_ctx, library_name, python_bin="python3"): """Gets the pyarrow shared library path.""" code = """import pyarrow, os, glob;print(glob.glob(os.path.join(""" +\ - """os.path.dirname(pyarrow.__file__), 'lib{}.*'))[0])""".format(library_name) + """os.path.dirname(pyarrow.__file__), '{}'))[0])""".format(library_name) result = _execute( repository_ctx, [ python_bin, "-c", code @@ -186,7 +186,7 @@ def _get_pyarrow_shared_library(repository_ctx, library_name, python_bin="python error_details=( "Is the Python binary path set up right? " + "(See ./configure or " + python_bin + ".) " + "Is distutils installed?")) - return result.stdout.splitlines()[0] + return result.stdout.splitlines()[0].replace('\\', '/') #python numpy include def _get_python_numpy_include(repository_ctx, python_bin="python3"): @@ -199,10 +199,16 @@ def _get_python_numpy_include(repository_ctx, python_bin="python3"): error_details=( "Is the Python binary path set up right? " + "(See ./configure or " + python_bin + ".) " + "Is distutils installed?")) - return result.stdout.splitlines()[0] + return result.stdout.splitlines()[0].replace('\\', '/') def _pyarrow_pip_impl(repository_ctx): - arrow_header_dir = _get_pyarrow_include(repository_ctx) + python_bin = "python3" + + # python 3.x is usually named as `python` by default on windows. + if _is_windows(repository_ctx): + python_bin = "python" + + arrow_header_dir = _get_pyarrow_include(repository_ctx, python_bin) arrow_header_rule = _symlink_genrule_for_dir( repository_ctx, arrow_header_dir, @@ -210,28 +216,50 @@ def _pyarrow_pip_impl(repository_ctx): "arrow_header_include", ) - arrow_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow") + arrow_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow.dll" if _is_windows(repository_ctx) else "libarrow.*", python_bin) arrow_library = arrow_library_path.rsplit("/",1 )[-1] arrow_library_rule = _symlink_genrule_for_dir( repository_ctx, None, "", "libarrow", [arrow_library_path], [arrow_library]) - arrow_python_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow_python") + arrow_python_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow_python.dll" if _is_windows(repository_ctx) else "libarrow_python.*", python_bin) arrow_python_library = arrow_python_library_path.rsplit("/",1 )[-1] arrow_python_library_rule = _symlink_genrule_for_dir( repository_ctx, None, "", "libarrow_python", [arrow_python_library_path], [arrow_python_library]) - python_numpy_include = _get_python_numpy_include(repository_ctx) + python_numpy_include = _get_python_numpy_include(repository_ctx, python_bin) python_numpy_include_rule = _symlink_genrule_for_dir( repository_ctx, python_numpy_include, 'python_numpy_include', 'python_numpy_include') - build_tpl = repository_ctx.path(Label("//bazel/arrow:BUILD.tpl.bzl")) - repository_ctx.template("BUILD", build_tpl, { - "%{ARROW_HEADER_GENRULE}": arrow_header_rule, - "%{ARROW_LIBRARY_GENRULE}": arrow_library_rule, - "%{ARROW_PYTHON_LIBRARY_GENRULE}": arrow_python_library_rule, - "%{PYTHON_NUMPY_INCLUDE_GENRULE}": python_numpy_include_rule, - }) + if _is_windows(repository_ctx): + arrow_interface_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow.lib", python_bin) + arrow_interface_library = arrow_interface_library_path.rsplit("/",1 )[-1] + arrow_interface_library_rule = _symlink_genrule_for_dir( + repository_ctx, None, "", "libarrow_interface", [arrow_interface_library_path], [arrow_interface_library]) + + arrow_python_interface_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow_python.lib", python_bin) + arrow_python_interface_library = arrow_python_interface_library_path.rsplit("/",1 )[-1] + arrow_python_interface_library_rule = _symlink_genrule_for_dir( + repository_ctx, None, "", "libarrow_python_interface", + [arrow_python_interface_library_path], [arrow_python_interface_library]) + + build_tpl = repository_ctx.path(Label("//bazel/arrow:BUILD.windows.bzl")) + repository_ctx.template("BUILD", build_tpl, { + "%{ARROW_HEADER_GENRULE}": arrow_header_rule, + "%{ARROW_LIBRARY_GENRULE}": arrow_library_rule, + "%{ARROW_ITF_LIBRARY_GENRULE}": arrow_interface_library_rule, + "%{ARROW_PYTHON_LIBRARY_GENRULE}": arrow_python_library_rule, + "%{ARROW_PYTHON_ITF_LIB_GENRULE}": arrow_python_interface_library_rule, + "%{PYTHON_NUMPY_INCLUDE_GENRULE}": python_numpy_include_rule, + }) + else: + build_tpl = repository_ctx.path(Label("//bazel/arrow:BUILD.tpl.bzl")) + repository_ctx.template("BUILD", build_tpl, { + "%{ARROW_HEADER_GENRULE}": arrow_header_rule, + "%{ARROW_LIBRARY_GENRULE}": arrow_library_rule, + "%{ARROW_PYTHON_LIBRARY_GENRULE}": arrow_python_library_rule, + "%{PYTHON_NUMPY_INCLUDE_GENRULE}": python_numpy_include_rule, + }) pyarrow_configure = repository_rule( implementation = _pyarrow_pip_impl, diff --git a/ci/run_ci.py b/ci/run_ci.py index d43a4af53d..30141ab63d 100644 --- a/ci/run_ci.py +++ b/ci/run_ci.py @@ -28,6 +28,8 @@ PYARROW_VERSION = "14.0.0" +NUMPY_VERSION = "1.26.4" + PROJECT_ROOT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../") logging.basicConfig( @@ -115,7 +117,7 @@ def _run_js(): def _install_cpp_deps(): _exec_cmd(f"pip install pyarrow=={PYARROW_VERSION}") _exec_cmd("pip install psutil") - _exec_cmd("pip install 'numpy<2.0.0'") + _exec_cmd(f"pip install numpy=={NUMPY_VERSION}") _install_bazel() diff --git a/cpp/fury/row/row.cc b/cpp/fury/row/row.cc index 79fa6594d3..e30cafa6bd 100644 --- a/cpp/fury/row/row.cc +++ b/cpp/fury/row/row.cc @@ -237,8 +237,8 @@ int *ArrayData::GetDimensions(ArrayData &array, int num_dims) { // use deep-first search to search to numDimensions-1 layer to get dimensions. int depth = 0; auto dimensions = new int[num_dims]; - int start_from_lefts[num_dims]; - ArrayData *arrs[num_dims]; // root to current node + std::vector start_from_lefts(num_dims); + std::vector arrs(num_dims); // root to current node ArrayData &arr = array; while (depth < num_dims) { arrs[depth] = &arr;