Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arrow: fix recipe when arrow/*:parquet=True #24044

Merged
merged 21 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 67 additions & 6 deletions recipes/arrow/all/conandata.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,24 +48,85 @@ sources:
url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-7.0.0/apache-arrow-7.0.0.tar.gz?action=download"
sha256: "e8f49b149a15ecef4e40fcfab1b87c113c6b1ee186005c169e5cdf95d31a99de"
patches:
"17.0.0":
- patch_file: "patches/16.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"16.1.0":
- patch_file: "patches/16.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"16.0.0":
- patch_file: "patches/16.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"15.0.0":
- patch_file: "patches/11.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"14.0.2":
- patch_file: "patches/11.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"14.0.1":
- patch_file: "patches/11.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"14.0.0":
- patch_file: "patches/11.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"13.0.0":
- patch_file: "patches/11.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"12.0.1":
- patch_file: "patches/11.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"12.0.0":
- patch_file: "patches/11.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"11.0.0":
- patch_file: "patches/11.0.0-0001-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
"8.0.1":
- patch_file: "patches/8.0.0-0005-install-utils.patch"
- patch_file: "patches/8.0.0-0001-static-analyzers.patch"
patch_description: "do not look for static analyzers"
patch_type: "conan"
- patch_file: "patches/8.0.0-0002-install-utils.patch"
patch_description: "enable utils installation"
patch_type: "conan"
- patch_file: "patches/8.0.0-0006-fix-cmake.patch"
- patch_file: "patches/8.0.0-0003-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
- patch_file: "patches/8.0.0-0004-fix-msvc-build.patch"
patch_description: "patch Arrow such that it can be built using MSVC"
patch_type: "backport"
patch_source: "https://github.com/apache/arrow/pull/13108"
"8.0.0":
- patch_file: "patches/8.0.0-0005-install-utils.patch"
- patch_file: "patches/8.0.0-0001-static-analyzers.patch"
patch_description: "do not look for static analyzers"
patch_type: "conan"
- patch_file: "patches/8.0.0-0002-install-utils.patch"
patch_description: "enable utils installation"
patch_type: "conan"
- patch_file: "patches/8.0.0-0006-fix-cmake.patch"
- patch_file: "patches/8.0.0-0003-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
- patch_file: "patches/8.0.0-0004-fix-msvc-build.patch"
patch_description: "patch Arrow such that it can be built using MSVC"
patch_type: "backport"
patch_source: "https://github.com/apache/arrow/pull/13108"
"7.0.0":
- patch_file: "patches/7.0.0-0006-install-utils.patch"
- patch_file: "patches/7.0.0-0001-static-analyzers.patch"
patch_description: "do not look for static analyzers"
patch_type: "conan"
- patch_file: "patches/7.0.0-0002-install-utils.patch"
patch_description: "enable utils installation"
patch_type: "conan"
- patch_file: "patches/7.0.0-0007-fix-cmake.patch"
- patch_file: "patches/7.0.0-0003-fix-cmake.patch"
patch_description: "use cci package"
patch_type: "conan"
43 changes: 27 additions & 16 deletions recipes/arrow/all/conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from conan.errors import ConanInvalidConfiguration, ConanException
from conan.tools.build import check_min_cppstd, cross_building
from conan.tools.cmake import CMake, CMakeDeps, CMakeToolchain, cmake_layout
from conan.tools.files import apply_conandata_patches, copy, export_conandata_patches, get, rmdir, save
from conan.tools.files import apply_conandata_patches, copy, export_conandata_patches, get, rmdir, save, replace_in_file
from conan.tools.microsoft import is_msvc, is_msvc_static_runtime
from conan.tools.scm import Version

Expand Down Expand Up @@ -72,7 +72,7 @@ class ArrowConan(ConanFile):
"shared": False,
"fPIC": True,
"gandiva": False,
"parquet": False,
"parquet": True,
"skyhook": False,
"substrait": False,
"acero": False,
Expand All @@ -87,7 +87,7 @@ class ArrowConan(ConanFile):
"simd_level": "default",
"runtime_simd_level": "max",
"with_backtrace": False,
"with_boost": False,
"with_boost": True,
"with_brotli": False,
"with_bz2": False,
"with_csv": False,
Expand All @@ -101,7 +101,7 @@ class ArrowConan(ConanFile):
"with_glog": False,
"with_grpc": False,
"with_json": False,
"with_thrift": False,
"with_thrift": True,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a good reason to have this set to true? Is this just for testing the changes n the CI? We could then revert it now

Copy link
Contributor Author

@robomics robomics Oct 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! with_thrift needs to be True in order to build Arrow with Parquet support (which is now enabled by default based on #24044 (comment)).

"with_llvm": False,
"with_openssl": False,
"with_opentelemetry": False,
Expand All @@ -112,7 +112,7 @@ class ArrowConan(ConanFile):
"with_utf8proc": False,
"with_lz4": False,
"with_snappy": False,
"with_zlib": False,
"with_zlib": True,
"with_zstd": False,
}
short_paths = True
Expand Down Expand Up @@ -162,7 +162,7 @@ def _requires_rapidjson(self):

def requirements(self):
if self.options.with_thrift:
self.requires("thrift/0.17.0")
self.requires("thrift/0.20.0")
if self.options.with_protobuf:
self.requires("protobuf/3.21.12")
if self.options.with_jemalloc:
Expand Down Expand Up @@ -202,12 +202,15 @@ def requirements(self):
if self.options.with_snappy:
self.requires("snappy/1.1.9")
if self.options.get_safe("simd_level") != None or \
self.options.get_safe("runtime_simd_level") != None:
self.requires("xsimd/9.0.1")
self.options.get_safe("runtime_simd_level") != None:
if Version(self.version) < 8:
self.requires("xsimd/9.0.1")
else:
self.requires("xsimd/13.0.0")
if self.options.with_zlib:
self.requires("zlib/[>=1.2.11 <2]")
if self.options.with_zstd:
self.requires("zstd/1.5.5")
self.requires("zstd/[>=1.5 <1.6]")
if self.options.with_re2:
self.requires("re2/20230301")
if self.options.with_utf8proc:
Expand All @@ -228,18 +231,18 @@ def validate(self):
# From https://github.com/conan-io/conan-center-index/pull/23163#issuecomment-2039808851
if self.options.gandiva:
if not self.options.with_re2:
raise ConanException("'with_re2' option should be True when'gandiva=True'")
raise ConanException("'with_re2' option should be True when 'gandiva=True'")
if not self.options.with_boost:
raise ConanException("'with_boost' option should be True when'gandiva=True'")
raise ConanException("'with_boost' option should be True when 'gandiva=True'")
if not self.options.with_utf8proc:
raise ConanException("'with_utf8proc' option should be True when'gandiva=True'")
raise ConanException("'with_utf8proc' option should be True when 'gandiva=True'")
if self.options.with_thrift and not self.options.with_boost:
raise ConanException("'with_boost' option should be True when 'thrift=True'")
if self.options.parquet:
if not self.options.with_boost:
raise ConanException("'with_boost' option should be True when'parquet=True'")
if not self.options.with_thrift:
raise ConanException("'with_thrift' option should be True when'parquet=True'")
raise ConanException("'with_thrift' option should be True when 'parquet=True'")
if self.options.with_flight_rpc and not self.options.with_protobuf:
raise ConanException("'with_protobuf' option should be True when'with_flight_rpc=True'")
raise ConanException("'with_protobuf' option should be True when 'with_flight_rpc=True'")

if self.settings.compiler.get_safe("cppstd"):
check_min_cppstd(self, self._min_cppstd)
Expand All @@ -261,6 +264,11 @@ def validate(self):
if self.dependencies["jemalloc"].options.enable_cxx:
raise ConanInvalidConfiguration("jemmalloc.enable_cxx of a static jemalloc must be disabled")

if self.options.with_thrift and not self.options.with_zlib:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are more checks in ThirdpartyToolchain.cmake that might be worth to reflect in the recipe, instead of relying on autodiscovery

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That file is over 5000 lines long :D. I do not have the bandwidth to read through it to see what's going on.
If you have some specific checks in mind?

Otherwise I suggest we leave this to be addressed in another PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only one that stood out to me, so I'm happy with keeping that on a follow-up PR, yes :)

raise ConanInvalidConfiguration("arrow:with_thrift requires arrow:with_zlib")

if self.options.parquet and not self.options.with_thrift:
raise ConanInvalidConfiguration("arrow:parquet requires arrow:with_thrift")

def build_requirements(self):
if Version(self.version) >= "13.0.0":
Expand Down Expand Up @@ -318,6 +326,7 @@ def generate(self):
tc.variables["GLOG_SOURCE"] = "SYSTEM"
tc.variables["ARROW_WITH_BACKTRACE"] = bool(self.options.with_backtrace)
tc.variables["ARROW_WITH_BROTLI"] = bool(self.options.with_brotli)
tc.variables["ARROW_WITH_RE2"] = bool(self.options.with_re2)
tc.variables["brotli_SOURCE"] = "SYSTEM"
if self.options.with_brotli:
tc.variables["ARROW_BROTLI_USE_SHARED"] = bool(self.dependencies["brotli"].options.shared)
Expand Down Expand Up @@ -349,8 +358,10 @@ def generate(self):
tc.variables["ARROW_ZSTD_USE_SHARED"] = bool(self.dependencies["zstd"].options.shared)
tc.variables["ORC_SOURCE"] = "SYSTEM"
tc.variables["ARROW_WITH_THRIFT"] = bool(self.options.with_thrift)
tc.variables["ARROW_THRIFT"] = bool(self.options.with_thrift)
tc.variables["Thrift_SOURCE"] = "SYSTEM"
if self.options.with_thrift:
tc.variables["ARROW_THRIFT"] = True
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can see, this variable was not set, but maybe we would need to set ARROW_WITH_THRIFT instead? Am I missing something? Do you have any insight? Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not super familiar with Arrow's code base, so I am not 100% sure on what's the difference between ARROW_WITH_THRIFT and ARROW_THRIFT.

A quick search on Arrow's codebase suggests that ARROW_THRIFT is only referenced in

$ find arrow -type f -exec grep -P "\bARROW_THRIFT\b" {} +

arrow/cpp/cmake_modules/ThirdpartyToolchain.cmake:if(ARROW_THRIFT)
arrow/python/build/lib.linux-x86_64-cpython-312/cmake_modules/ThirdpartyToolchain.cmake:if(ARROW_THRIFT)

Where it seems to be used only to turn on ZLIB support - link

if(ARROW_THRIFT)
  set(ARROW_WITH_ZLIB ON)
endif()

So I think setting both ARROW_WITH_THRIFT and ARROW_THRIFT is ok (will push a commit to do so ASAP).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

tc.variables["THRIFT_VERSION"] = bool(self.dependencies["thrift"].ref.version) # a recent thrift does not require boost
tc.variables["ARROW_THRIFT_USE_SHARED"] = bool(self.dependencies["thrift"].options.shared)
tc.variables["ARROW_USE_OPENSSL"] = self.options.with_openssl
Expand Down
42 changes: 42 additions & 0 deletions recipes/arrow/all/patches/11.0.0-0001-fix-cmake.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
diff --git a/cpp/cmake_modules/FindThriftAlt.cmake b/cpp/cmake_modules/FindThriftAlt.cmake
index f3e49021d..95177c2a6 100644
--- a/cpp/cmake_modules/FindThriftAlt.cmake
+++ b/cpp/cmake_modules/FindThriftAlt.cmake
@@ -45,22 +45,21 @@ endif()
# * https://github.com/apache/thrift/pull/2725
perseoGI marked this conversation as resolved.
Show resolved Hide resolved
# * https://github.com/apache/thrift/pull/2726
# * https://github.com/conda-forge/thrift-cpp-feedstock/issues/68
-if(NOT WIN32)
- set(find_package_args "")
- if(ThriftAlt_FIND_VERSION)
- list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
- endif()
- if(ThriftAlt_FIND_QUIETLY)
- list(APPEND find_package_args QUIET)
- endif()
- find_package(Thrift ${find_package_args})
- if(Thrift_FOUND)
- set(ThriftAlt_FOUND TRUE)
- add_executable(thrift::compiler IMPORTED)
- set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
- "${THRIFT_COMPILER}")
- return()
- endif()
+
+set(find_package_args "")
+if(ThriftAlt_FIND_VERSION)
+ list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
+endif()
+if(ThriftAlt_FIND_QUIETLY)
+ list(APPEND find_package_args QUIET)
+endif()
+find_package(Thrift ${find_package_args})
+if(Thrift_FOUND)
+ set(ThriftAlt_FOUND TRUE)
+ add_executable(thrift::compiler IMPORTED)
+ set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
+ "${THRIFT_COMPILER}")
+ return()
endif()

function(extract_thrift_version)
62 changes: 62 additions & 0 deletions recipes/arrow/all/patches/16.0.0-0001-fix-cmake.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
diff --git a/cpp/cmake_modules/FindThriftAlt.cmake b/cpp/cmake_modules/FindThriftAlt.cmake
index f3e49021d..3e63f1edf 100644
--- a/cpp/cmake_modules/FindThriftAlt.cmake
+++ b/cpp/cmake_modules/FindThriftAlt.cmake
@@ -45,23 +45,23 @@ endif()
# * https://github.com/apache/thrift/pull/2725
# * https://github.com/apache/thrift/pull/2726
# * https://github.com/conda-forge/thrift-cpp-feedstock/issues/68
-if(NOT WIN32)
- set(find_package_args "")
- if(ThriftAlt_FIND_VERSION)
- list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
- endif()
- if(ThriftAlt_FIND_QUIETLY)
- list(APPEND find_package_args QUIET)
- endif()
- find_package(Thrift ${find_package_args})
- if(Thrift_FOUND)
- set(ThriftAlt_FOUND TRUE)
- add_executable(thrift::compiler IMPORTED)
- set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
- "${THRIFT_COMPILER}")
- return()
- endif()
+
+set(find_package_args "")
+if(ThriftAlt_FIND_VERSION)
+ list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
+endif()
+if(ThriftAlt_FIND_QUIETLY)
+ list(APPEND find_package_args QUIET)
endif()
+find_package(Thrift ${find_package_args})
+if(Thrift_FOUND)
+ set(ThriftAlt_FOUND TRUE)
+ add_executable(thrift::compiler IMPORTED)
+ set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
+ "${THRIFT_COMPILER}")
+ return()
+endif()
+

function(extract_thrift_version)
if(ThriftAlt_INCLUDE_DIR)
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 93f2e72d8..e00f73f7d 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -262,11 +262,11 @@ if(NOT PARQUET_MINIMAL_DEPENDENCY)

# These are libraries that we will link privately with parquet_shared (as they
# do not need to be linked transitively by other linkers)
- list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS thrift::thrift)
+ list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS Boost::headers thrift::thrift)
robomics marked this conversation as resolved.
Show resolved Hide resolved

# Link publicly with parquet_static (because internal users need to
# transitively link all dependencies)
- list(APPEND PARQUET_STATIC_LINK_LIBS thrift::thrift)
+ list(APPEND PARQUET_STATIC_LINK_LIBS Boost::headers thrift::thrift)
if(NOT THRIFT_VENDORED)
list(APPEND PARQUET_STATIC_INSTALL_INTERFACE_LIBS thrift::thrift)
endif()
17 changes: 17 additions & 0 deletions recipes/arrow/all/patches/7.0.0-0001-static-analyzers.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index dff5b1a59..1189a2957 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -161,11 +161,7 @@ else()
set(MSVC_TOOLCHAIN FALSE)
endif()

-find_package(ClangTools)
-find_package(InferTools)
-if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1"
- OR CLANG_TIDY_FOUND
- OR INFER_FOUND)
+if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
# Generate a Clang compile_commands.json "compilation database" file for use
# with various development tools, such as Vim's YouCompleteMe plugin.
# See http://clang.llvm.org/docs/JSONCompilationDatabase.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 495018e..f6cee6f 100644
index 495018ec0..f6cee6fde 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -61,8 +61,12 @@ endif()
Expand Down
Loading