Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ endif(UNIX)
set(ARROW_LINK_LIBS)

# Libraries to link statically with libarrow.so
set(ARROW_STATIC_LINK_LIBS)
set(ARROW_STATIC_LINK_LIBS double-conversion)

if (ARROW_WITH_BROTLI)
SET(ARROW_STATIC_LINK_LIBS
Expand Down Expand Up @@ -694,6 +694,7 @@ else ()
set(ARROW_MIN_TEST_LIBS
arrow_shared
${ARROW_LINK_LIBS}
double-conversion
${BOOST_SYSTEM_LIBRARY}
${BOOST_FILESYSTEM_LIBRARY}
${BOOST_REGEX_LIBRARY}
Expand Down
70 changes: 58 additions & 12 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "")
set(BROTLI_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(FLATBUFFERS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(GFLAGS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(GLOG_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(GRPC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
# Using gtest from the toolchain breaks AppVeyor builds
if (NOT MSVC)
set(GTEST_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
endif()
set(JEMALLOC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(GRPC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(LZ4_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
# orc disabled as it's not in conda-forge (but in Anaconda with an incompatible ABI)
# set(ORC_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
Expand All @@ -41,7 +41,6 @@ if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "")
set(THRIFT_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(ZLIB_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(ZSTD_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")
set(GLOG_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}")

if (NOT DEFINED ENV{BOOST_ROOT})
# Since we have to set this in the environment, we check whether
Expand All @@ -56,6 +55,10 @@ if (DEFINED ENV{BROTLI_HOME})
set(BROTLI_HOME "$ENV{BROTLI_HOME}")
endif()

if (DEFINED ENV{DOUBLE_CONVERSION_HOME})
set(DOUBLE_CONVERSION_HOME "$ENV{DOUBLE_CONVERSION_HOME}")
endif()

if (DEFINED ENV{FLATBUFFERS_HOME})
set(FLATBUFFERS_HOME "$ENV{FLATBUFFERS_HOME}")
endif()
Expand All @@ -64,6 +67,10 @@ if (DEFINED ENV{GFLAGS_HOME})
set(GFLAGS_HOME "$ENV{GFLAGS_HOME}")
endif()

if (DEFINED ENV{GLOG_HOME})
set(GLOG_HOME "$ENV{GLOG_HOME}")
endif()

if (DEFINED ENV{GRPC_HOME})
set(GRPC_HOME "$ENV{GRPC_HOME}")
endif()
Expand Down Expand Up @@ -108,10 +115,6 @@ if (DEFINED ENV{ZSTD_HOME})
set(ZSTD_HOME "$ENV{ZSTD_HOME}")
endif()

if (DEFINED ENV{GLOG_HOME})
set(GLOG_HOME "$ENV{GLOG_HOME}")
endif()

# ----------------------------------------------------------------------
# Some EP's require other EP's

Expand Down Expand Up @@ -165,6 +168,12 @@ else()
set(BROTLI_SOURCE_URL "https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz")
endif()

if (DEFINED ENV{DOUBLE_CONVERSION_SOURCE_URL})
set(DOUBLE_CONVERSION_SOURCE_URL "$ENV{DOUBLE_CONVERSION_SOURCE_URL}")
else()
set(DOUBLE_CONVERSION_SOURCE_URL "https://github.com/google/double-conversion/archive/${DOUBLE_CONVERSION_VERSION}.tar.gz")
endif()

if (DEFINED ENV{ARROW_FLATBUFFERS_URL})
set(FLATBUFFERS_SOURCE_URL "$ENV{ARROW_FLATBUFFERS_URL}")
else()
Expand All @@ -183,6 +192,12 @@ else()
set(GFLAGS_SOURCE_URL "https://github.com/gflags/gflags/archive/${GFLAGS_VERSION}.tar.gz")
endif()

if (DEFINED ENV{ARROW_GLOG_URL})
set(GLOG_SOURCE_URL "$ENV{ARROW_GLOG_URL}")
else()
set(GLOG_SOURCE_URL "https://github.com/google/glog/archive/${GLOG_VERSION}.tar.gz")
endif()

if (DEFINED ENV{ARROW_GRPC_URL})
set(GRPC_SOURCE_URL "$ENV{ARROW_GRPC_URL}")
else()
Expand Down Expand Up @@ -245,12 +260,6 @@ else()
set(ZSTD_SOURCE_URL "https://github.com/facebook/zstd/archive/${ZSTD_VERSION}.tar.gz")
endif()

if (DEFINED ENV{ARROW_GLOG_URL})
set(GLOG_SOURCE_URL "$ENV{ARROW_GLOG_URL}")
else()
set(GLOG_SOURCE_URL "https://github.com/google/glog/archive/${GLOG_VERSION}.tar.gz")
endif()

# ----------------------------------------------------------------------
# ExternalProject options

Expand Down Expand Up @@ -436,6 +445,43 @@ endif()

include_directories(SYSTEM ${Boost_INCLUDE_DIR})

# ----------------------------------------------------------------------
# Google double-conversion

if("${DOUBLE_CONVERSION_HOME}" STREQUAL "")
set(DOUBLE_CONVERSION_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/double-conversion_ep/src/double-conversion_ep")
set(DOUBLE_CONVERSION_HOME "${DOUBLE_CONVERSION_PREFIX}")
set(DOUBLE_CONVERSION_INCLUDE_DIR "${DOUBLE_CONVERSION_PREFIX}/include")
set(DOUBLE_CONVERSION_STATIC_LIB "${DOUBLE_CONVERSION_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}double-conversion${CMAKE_STATIC_LIBRARY_SUFFIX}")

set(DOUBLE_CONVERSION_CMAKE_ARGS
"-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
"-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
"-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}"
"-DCMAKE_INSTALL_PREFIX=${DOUBLE_CONVERSION_PREFIX}")
ExternalProject_Add(double-conversion_ep
${EP_LOG_OPTIONS}
INSTALL_DIR ${DOUBLE_CONVERSION_PREFIX}
URL ${DOUBLE_CONVERSION_SOURCE_URL}
CMAKE_ARGS ${DOUBLE_CONVERSION_CMAKE_ARGS}
BUILD_BYPRODUCTS "${DOUBLE_CONVERSION_STATIC_LIB}")
set(DOUBLE_CONVERSION_VENDORED 1)
else()
find_package(double-conversion REQUIRED)
set(DOUBLE_CONVERSION_VENDORED 0)
endif()

include_directories(SYSTEM ${DOUBLE_CONVERSION_INCLUDE_DIR})
ADD_THIRDPARTY_LIB(double-conversion
STATIC_LIB ${DOUBLE_CONVERSION_STATIC_LIB})

if (DOUBLE_CONVERSION_VENDORED)
add_dependencies(arrow_dependencies double-conversion_ep)
endif()

# ----------------------------------------------------------------------
# Google gtest & gflags

if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
add_custom_target(unittest ctest -L unittest)

Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/symbols.map
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@

# Statically linked C++ dependencies
boost::*;
double_conversion::*;
google::*;
# glog
*MakeCheckOpValueString*;
Expand Down
35 changes: 32 additions & 3 deletions cpp/src/arrow/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,40 @@
# arrow_util
#######################################

file(GLOB_RECURSE ARROW_UTIL_HEADERS "*.h")

# Headers: top level
install(FILES
${ARROW_UTIL_HEADERS}
bit-stream-utils.h
bit-util.h
bpacking.h
checked_cast.h
compiler-util.h
compression.h
compression_brotli.h
compression_lz4.h
compression_snappy.h
compression_zlib.h
compression_zstd.h
cpu-info.h
decimal.h
hash-util.h
hash.h
io-util.h
key_value_metadata.h
lazy.h
logging.h
macros.h
memory.h
parallel.h
rle-encoding.h
sse-util.h
stl.h
stopwatch.h
string.h
thread-pool.h
type_traits.h
variant.h
visibility.h
windows_compatibility.h
DESTINATION include/arrow/util)

#######################################
Expand Down
51 changes: 43 additions & 8 deletions cpp/src/arrow/util/parsing.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

// This is a private header for string-to-number parsing utilitiers

#ifndef ARROW_UTIL_PARSING_H
#define ARROW_UTIL_PARSING_H

Expand All @@ -24,6 +26,8 @@
#include <string>
#include <type_traits>

#include <double-conversion/double-conversion.h>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

arrow/util/parsing.h is being installed, since you are including a thirdparty header, we should stop installing the header

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, the header should not be installed anymore.


#include "arrow/type.h"
#include "arrow/type_traits.h"

Expand Down Expand Up @@ -79,26 +83,57 @@ class StringConverter<BooleanType> {

// Ideas for faster float parsing:
// - http://rapidjson.org/md_doc_internals.html#ParsingDouble
// - https://github.com/google/double-conversion
// - https://github.com/google/double-conversion [used here]
// - https://github.com/achan001/dtoa-fast

template <class ARROW_TYPE>
class StringToFloatConverterMixin {
public:
using value_type = typename ARROW_TYPE::c_type;

StringToFloatConverterMixin() { ibuf.imbue(std::locale::classic()); }
StringToFloatConverterMixin()
: main_converter_(flags_, main_junk_value_, main_junk_value_, "inf", "nan"),
fallback_converter_(flags_, fallback_junk_value_, fallback_junk_value_, "inf",
"nan") {}

bool operator()(const char* s, size_t length, value_type* out) {
ibuf.clear();
ibuf.str(std::string(s, length));
ibuf >> *out;
// XXX Should we reset errno on failure?
return !ibuf.fail() && ibuf.eof();
value_type v;
// double-conversion doesn't give us an error flag but signals parse
// errors with sentinel values. Since a sentinel value can appear as
// legitimate input, we fallback on a second converter with a different
// sentinel to eliminate false errors.
TryConvert(main_converter_, s, length, &v);
if (ARROW_PREDICT_FALSE(v == static_cast<value_type>(main_junk_value_))) {
TryConvert(fallback_converter_, s, length, &v);
if (ARROW_PREDICT_FALSE(v == static_cast<value_type>(fallback_junk_value_))) {
return false;
}
}
*out = v;
return true;
}

protected:
std::istringstream ibuf;
static const int flags_ =
double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY;
// Two unlikely values to signal a parsing error
static constexpr double main_junk_value_ = 0.7066424364107089;
static constexpr double fallback_junk_value_ = 0.40088499148279166;

double_conversion::StringToDoubleConverter main_converter_;
double_conversion::StringToDoubleConverter fallback_converter_;

inline void TryConvert(double_conversion::StringToDoubleConverter& converter,
const char* s, size_t length, float* out) {
int processed_length;
*out = converter.StringToFloat(s, static_cast<int>(length), &processed_length);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder how double-conversion handle negative ints when length > 2^31.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Realistically, it probably doesn't and expects positive input.

}

inline void TryConvert(double_conversion::StringToDoubleConverter& converter,
const char* s, size_t length, double* out) {
int processed_length;
*out = converter.StringToDouble(s, static_cast<int>(length), &processed_length);
}
};

template <>
Expand Down
23 changes: 12 additions & 11 deletions cpp/thirdparty/versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,20 @@
# Toolchain library versions

BOOST_VERSION=1.67.0
GTEST_VERSION=1.8.0
GFLAGS_VERSION=v2.2.0
GBENCHMARK_VERSION=v1.4.1
BROTLI_VERSION=v0.6.0
DOUBLE_CONVERSION_VERSION=v3.1.1
FLATBUFFERS_VERSION=02a7807dd8d26f5668ffbbec0360dc107bbfabd5
RAPIDJSON_VERSION=v1.1.0
GBENCHMARK_VERSION=v1.4.1
GFLAGS_VERSION=v2.2.0
GLOG_VERSION=v0.3.5
GRPC_VERSION=1.14.1
GTEST_VERSION=1.8.0
JEMALLOC_VERSION=17c897976c60b0e6e4f4a365c751027244dada7a
SNAPPY_VERSION=1.1.3
BROTLI_VERSION=v0.6.0
LZ4_VERSION=v1.7.5
ZLIB_VERSION=1.2.8
ZSTD_VERSION=v1.2.0
PROTOBUF_VERSION=v3.6.1
GRPC_VERSION=1.14.1
ORC_VERSION=1.5.1
PROTOBUF_VERSION=v3.6.1
RAPIDJSON_VERSION=v1.1.0
SNAPPY_VERSION=1.1.3
THRIFT_VERSION=0.11.0
GLOG_VERSION=v0.3.5
ZLIB_VERSION=1.2.8
ZSTD_VERSION=v1.2.0