From fcf972e23f422f5a48f2c99104a39dd25feac9e5 Mon Sep 17 00:00:00 2001 From: Yuqi Gu Date: Thu, 22 Nov 2018 06:35:38 +0000 Subject: [PATCH 1/3] ARROW-3849 Leverage Armv8 crc32 extension instructions to accelerate the hash computation for Arm64 The Hash utility leverages SSE4 to accelerate the Crc32 data hash computation for x86. Correspondingly, we will leverage the Arm crc32 extension instructions to accelerate the hash computation for AArch64. Change-Id: I7da36f8da8d1c32f10eef33a664e5e230f214c59 Signed-off-by: Yuqi Gu --- cpp/src/arrow/builder.cc | 347 ++++++++--- cpp/src/arrow/compute/kernels/hash.cc | 806 +++++++++++++++++++++----- cpp/src/arrow/util/CMakeLists.txt | 4 + 3 files changed, 936 insertions(+), 221 deletions(-) diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 6aa415bbed2f3..b7a0fb5513a4e 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -33,9 +33,18 @@ #include "arrow/util/bit-util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" -#include "arrow/util/hashing.h" +#include "arrow/util/hash-util.h" +#include "arrow/util/hash.h" #include "arrow/util/logging.h" +#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_USE_ARMCE) +#define HASH_MODE USE_ARMCRC +#elif defined(ARROW_USE_SSE) +#define HASH_MODE USE_SSE42 +#else +#define HASH_MODE USE_DEFAULT +#endif + namespace arrow { using internal::AdaptiveIntBuilderBase; @@ -729,7 +738,7 @@ Status BooleanBuilder::AppendValues(const std::vector& values, int64_t i = 0; internal::GenerateBitsUnrolled(raw_data_, length_, length, - [&values, &i]() -> bool { return values[i++]; }); + [values, &i]() -> bool { return values[i++]; }); // this updates length_ ArrayBuilder::UnsafeAppendToBitmap(is_valid); @@ -742,7 +751,7 @@ Status BooleanBuilder::AppendValues(const std::vector& values) { int64_t i = 0; internal::GenerateBitsUnrolled(raw_data_, length_, length, - [&values, &i]() -> bool { return values[i++]; }); + [values, &i]() -> bool { return values[i++]; }); // this updates length_ ArrayBuilder::UnsafeSetNotNull(length); @@ -752,42 +761,152 @@ Status BooleanBuilder::AppendValues(const std::vector& values) { // ---------------------------------------------------------------------- // DictionaryBuilder +using internal::DictionaryScalar; +using internal::WrappedBinary; + +namespace { + +// A helper class to manage a hash table embedded in a typed Builder. +template +struct DictionaryHashHelper {}; + +// DictionaryHashHelper implementation for primitive types template -class DictionaryBuilder::MemoTableImpl - : public internal::HashTraits::MemoTableType { - public: - using MemoTableType = typename internal::HashTraits::MemoTableType; - using MemoTableType::MemoTableType; +struct DictionaryHashHelper> { + using Builder = typename TypeTraits::BuilderType; + using Scalar = typename DictionaryScalar::type; + + // Get the dictionary value at the given builder index + static Scalar GetDictionaryValue(const Builder& builder, int64_t index) { + return builder.GetValue(index); + } + + // Compute the hash of a scalar value + static int64_t HashValue(const Scalar& value, int byte_width) { + return HashUtil::Hash(&value, sizeof(Scalar), 0); + } + + // Return whether the dictionary value at the given builder index is unequal to value + static bool SlotDifferent(const Builder& builder, int64_t index, const Scalar& value) { + return GetDictionaryValue(builder, index) != value; + } + + // Append a value to the builder + static Status AppendValue(Builder& builder, const Scalar& value) { + return builder.Append(value); + } + + // Append another builder's contents to the builder + static Status AppendArray(Builder& builder, const Array& in_array) { + const auto& array = checked_cast(in_array); + return builder.AppendValues(reinterpret_cast(array.values()->data()), + array.length(), nullptr); + } }; +// DictionaryHashHelper implementation for StringType / BinaryType template -DictionaryBuilder::~DictionaryBuilder() {} +struct DictionaryHashHelper> { + using Builder = typename TypeTraits::BuilderType; + using Scalar = typename DictionaryScalar::type; + + static Scalar GetDictionaryValue(const Builder& builder, int64_t index) { + int32_t v_length; + const uint8_t* v_ptr = builder.GetValue(index, &v_length); + return WrappedBinary(v_ptr, v_length); + } + + static int64_t HashValue(const Scalar& value, int byte_width) { + return HashUtil::Hash(value.ptr_, value.length_, 0); + } + + static bool SlotDifferent(const Builder& builder, int64_t index, const Scalar& value) { + int32_t other_length; + const uint8_t* other_ptr = builder.GetValue(index, &other_length); + return value.length_ != other_length || + memcmp(value.ptr_, other_ptr, other_length) != 0; + } + + static Status AppendValue(Builder& builder, const Scalar& value) { + return builder.Append(value.ptr_, value.length_); + } + + static Status AppendArray(Builder& builder, const Array& in_array) { + const auto& array = checked_cast(in_array); + for (uint64_t index = 0, limit = array.length(); index < limit; ++index) { + int32_t length; + const uint8_t* ptr = array.GetValue(index, &length); + RETURN_NOT_OK(builder.Append(ptr, length)); + } + return Status::OK(); + } +}; + +// DictionaryHashHelper implementation for FixedSizeBinaryType +template +struct DictionaryHashHelper> { + using Builder = typename TypeTraits::BuilderType; + using Scalar = typename DictionaryScalar::type; + + static Scalar GetDictionaryValue(const Builder& builder, int64_t index) { + return builder.GetValue(index); + } + + static int64_t HashValue(const Scalar& value, int byte_width) { + return HashUtil::Hash(value, byte_width, 0); + } + + static bool SlotDifferent(const Builder& builder, int64_t index, const uint8_t* value) { + const int32_t width = builder.byte_width(); + const uint8_t* other_value = builder.GetValue(index); + return memcmp(value, other_value, width) != 0; + } + + static Status AppendValue(Builder& builder, const Scalar& value) { + return builder.Append(value); + } + + static Status AppendArray(Builder& builder, const Array& in_array) { + const auto& array = checked_cast(in_array); + for (uint64_t index = 0, limit = array.length(); index < limit; ++index) { + const Scalar value = array.GetValue(index); + RETURN_NOT_OK(builder.Append(value)); + } + return Status::OK(); + } +}; + +} // namespace template DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& type, MemoryPool* pool) - : ArrayBuilder(type, pool), byte_width_(-1), values_builder_(pool) { - DCHECK_EQ(T::type_id, type->id()) << "inconsistent type passed to DictionaryBuilder"; -} + : ArrayBuilder(type, pool), + hash_slots_(nullptr), + dict_builder_(type, pool), + overflow_dict_builder_(type, pool), + values_builder_(pool), + byte_width_(-1) {} DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& type, MemoryPool* pool) - : ArrayBuilder(type, pool), values_builder_(pool) { - DCHECK_EQ(Type::NA, type->id()) << "inconsistent type passed to DictionaryBuilder"; -} + : ArrayBuilder(type, pool), values_builder_(pool) {} template <> DictionaryBuilder::DictionaryBuilder( const std::shared_ptr& type, MemoryPool* pool) : ArrayBuilder(type, pool), + hash_slots_(nullptr), + dict_builder_(type, pool), + overflow_dict_builder_(type, pool), + values_builder_(pool), byte_width_(checked_cast(*type).byte_width()) {} template void DictionaryBuilder::Reset() { - ArrayBuilder::Reset(); + dict_builder_.Reset(); + overflow_dict_builder_.Reset(); values_builder_.Reset(); - memo_table_.reset(); - delta_offset_ = 0; } template @@ -797,10 +916,14 @@ Status DictionaryBuilder::Resize(int64_t capacity) { } if (capacity_ == 0) { - // Initialize hash table - // XXX should we let the user pass additional size heuristics? - memo_table_.reset(new MemoTableImpl(0)); - delta_offset_ = 0; + // Fill the initial hash table + RETURN_NOT_OK(internal::NewHashTable(kInitialHashTableSize, pool_, &hash_table_)); + hash_slots_ = reinterpret_cast(hash_table_->mutable_data()); + hash_table_size_ = kInitialHashTableSize; + entry_id_offset_ = 0; + mod_bitmask_ = kInitialHashTableSize - 1; + hash_table_load_threshold_ = + static_cast(static_cast(capacity) * kMaxHashTableLoad); } RETURN_NOT_OK(values_builder_.Resize(capacity)); return ArrayBuilder::Resize(capacity); @@ -814,12 +937,67 @@ Status DictionaryBuilder::Resize(int64_t capacity) { return ArrayBuilder::Resize(capacity); } +template +int64_t DictionaryBuilder::HashValue(const Scalar& value) { + return DictionaryHashHelper::HashValue(value, byte_width_); +} + +template +typename DictionaryBuilder::Scalar DictionaryBuilder::GetDictionaryValue( + typename TypeTraits::BuilderType& dictionary_builder, int64_t index) { + return DictionaryHashHelper::GetDictionaryValue(dictionary_builder, index); +} + +template +bool DictionaryBuilder::SlotDifferent(hash_slot_t index, const Scalar& value) { + DCHECK_GE(index, 0); + if (index >= entry_id_offset_) { + // Lookup delta dictionary + DCHECK_LT(index - entry_id_offset_, dict_builder_.length()); + return DictionaryHashHelper::SlotDifferent( + dict_builder_, static_cast(index - entry_id_offset_), value); + } else { + DCHECK_LT(index, overflow_dict_builder_.length()); + return DictionaryHashHelper::SlotDifferent(overflow_dict_builder_, + static_cast(index), value); + } +} + +template +Status DictionaryBuilder::AppendDictionary(const Scalar& value) { + return DictionaryHashHelper::AppendValue(dict_builder_, value); +} + template Status DictionaryBuilder::Append(const Scalar& value) { RETURN_NOT_OK(Reserve(1)); + // Based on DictEncoder::Put + int64_t j = HashValue(value) & mod_bitmask_; + hash_slot_t index = hash_slots_[j]; + + // Find an empty slot + while (kHashSlotEmpty != index && SlotDifferent(index, value)) { + // Linear probing + ++j; + if (j == hash_table_size_) { + j = 0; + } + index = hash_slots_[j]; + } + + if (index == kHashSlotEmpty) { + // Not in the hash table, so we insert it now + index = static_cast(dict_builder_.length() + entry_id_offset_); + hash_slots_[j] = index; + RETURN_NOT_OK(AppendDictionary(value)); + + if (ARROW_PREDICT_FALSE(static_cast(dict_builder_.length()) > + hash_table_load_threshold_)) { + RETURN_NOT_OK(DoubleTableSize()); + } + } - auto memo_index = memo_table_->GetOrInsert(value); - RETURN_NOT_OK(values_builder_.Append(memo_index)); + RETURN_NOT_OK(values_builder_.Append(index)); return Status::OK(); } @@ -851,24 +1029,48 @@ Status DictionaryBuilder::AppendArray(const Array& array) { return Status::OK(); } +template <> +Status DictionaryBuilder::AppendArray(const Array& array) { + if (!type_->Equals(*array.type())) { + return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type"); + } + + const auto& numeric_array = checked_cast(array); + for (int64_t i = 0; i < array.length(); i++) { + if (array.IsNull(i)) { + RETURN_NOT_OK(AppendNull()); + } else { + RETURN_NOT_OK(Append(numeric_array.Value(i))); + } + } + return Status::OK(); +} + template -Status DictionaryBuilder::FinishInternal(std::shared_ptr* out) { - // Finalize indices array - RETURN_NOT_OK(values_builder_.FinishInternal(out)); +Status DictionaryBuilder::DoubleTableSize() { +#define INNER_LOOP \ + int64_t j = HashValue(GetDictionaryValue(dict_builder_, index)) & new_mod_bitmask + + DOUBLE_TABLE_SIZE(, INNER_LOOP); + + return Status::OK(); +} - // Generate dictionary array from hash table contents +template +Status DictionaryBuilder::FinishInternal(std::shared_ptr* out) { std::shared_ptr dictionary; - std::shared_ptr dictionary_data; + entry_id_offset_ += dict_builder_.length(); + RETURN_NOT_OK(dict_builder_.Finish(&dictionary)); - RETURN_NOT_OK(internal::DictionaryTraits::GetDictionaryArrayData( - pool_, type_, *memo_table_, delta_offset_, &dictionary_data)); - dictionary = MakeArray(dictionary_data); + // Store current dict entries for further uses of this DictionaryBuilder + RETURN_NOT_OK( + DictionaryHashHelper::AppendArray(overflow_dict_builder_, *dictionary)); + DCHECK_EQ(entry_id_offset_, overflow_dict_builder_.length()); - // Set type of array data to the right dictionary type + RETURN_NOT_OK(values_builder_.FinishInternal(out)); (*out)->type = std::make_shared((*out)->type, dictionary); - // Update internals for further uses of this DictionaryBuilder - delta_offset_ = memo_table_->size(); + dict_builder_.Reset(); values_builder_.Reset(); return Status::OK(); @@ -887,42 +1089,26 @@ Status DictionaryBuilder::FinishInternal(std::shared_ptr* o // StringType and BinaryType specializations // -#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \ - \ - template <> \ - Status DictionaryBuilder::AppendArray(const Array& array) { \ - using ArrayType = typename TypeTraits::ArrayType; \ - const ArrayType& binary_array = checked_cast(array); \ - for (int64_t i = 0; i < array.length(); i++) { \ - if (array.IsNull(i)) { \ - RETURN_NOT_OK(AppendNull()); \ - } else { \ - RETURN_NOT_OK(Append(binary_array.GetView(i))); \ - } \ - } \ - return Status::OK(); \ +#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \ + \ + template <> \ + Status DictionaryBuilder::AppendArray(const Array& array) { \ + const BinaryArray& binary_array = checked_cast(array); \ + WrappedBinary value(nullptr, 0); \ + for (int64_t i = 0; i < array.length(); i++) { \ + if (array.IsNull(i)) { \ + RETURN_NOT_OK(AppendNull()); \ + } else { \ + value.ptr_ = binary_array.GetValue(i, &value.length_); \ + RETURN_NOT_OK(Append(value)); \ + } \ + } \ + return Status::OK(); \ } BINARY_DICTIONARY_SPECIALIZATIONS(StringType); BINARY_DICTIONARY_SPECIALIZATIONS(BinaryType); -template <> -Status DictionaryBuilder::AppendArray(const Array& array) { - if (!type_->Equals(*array.type())) { - return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type"); - } - - const auto& typed_array = checked_cast(array); - for (int64_t i = 0; i < array.length(); i++) { - if (array.IsNull(i)) { - RETURN_NOT_OK(AppendNull()); - } else { - RETURN_NOT_OK(Append(typed_array.GetValue(i))); - } - } - return Status::OK(); -} - template class DictionaryBuilder; template class DictionaryBuilder; template class DictionaryBuilder; @@ -1130,19 +1316,6 @@ const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const { return value_data_builder_.data() + offset; } -util::string_view BinaryBuilder::GetView(int64_t i) const { - const int32_t* offsets = offsets_builder_.data(); - int32_t offset = offsets[i]; - int32_t value_length; - if (i == (length_ - 1)) { - value_length = static_cast(value_data_builder_.length()) - offset; - } else { - value_length = offsets[i + 1] - offset; - } - return util::string_view( - reinterpret_cast(value_data_builder_.data() + offset), value_length); -} - StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {} Status StringBuilder::AppendValues(const std::vector& values, @@ -1241,12 +1414,6 @@ FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr& byte_width_(checked_cast(*type).byte_width()), byte_builder_(pool) {} -#ifndef NDEBUG -void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) { - DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder"; -} -#endif - Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length, const uint8_t* valid_bytes) { RETURN_NOT_OK(Reserve(length)); @@ -1254,6 +1421,10 @@ Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length, return byte_builder_.Append(data, length * byte_width_); } +Status FixedSizeBinaryBuilder::Append(const std::string& value) { + return Append(reinterpret_cast(value.c_str())); +} + Status FixedSizeBinaryBuilder::AppendNull() { RETURN_NOT_OK(Reserve(1)); UnsafeAppendToBitmap(false); @@ -1286,12 +1457,6 @@ const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const { return data_ptr + i * byte_width_; } -util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const { - const uint8_t* data_ptr = byte_builder_.data(); - return util::string_view(reinterpret_cast(data_ptr + i * byte_width_), - byte_width_); -} - // ---------------------------------------------------------------------- // Struct diff --git a/cpp/src/arrow/compute/kernels/hash.cc b/cpp/src/arrow/compute/kernels/hash.cc index c057ea5736139..81801e2743b04 100644 --- a/cpp/src/arrow/compute/kernels/hash.cc +++ b/cpp/src/arrow/compute/kernels/hash.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -38,24 +39,32 @@ #include "arrow/type_traits.h" #include "arrow/util/bit-util.h" #include "arrow/util/checked_cast.h" -#include "arrow/util/hashing.h" +#include "arrow/util/hash-util.h" +#include "arrow/util/hash.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" -#include "arrow/util/string_view.h" -#include "arrow/visitor_inline.h" namespace arrow { class MemoryPool; using internal::checked_cast; -using internal::DictionaryTraits; -using internal::HashTraits; namespace compute { +// TODO(wesm): Enable top-level dispatch to SSE4 hashing if it is enabled +#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_USE_ARMCE) +#define HASH_MODE USE_ARMCRC +#elif defined(ARROW_USE_SSE) +#define HASH_MODE USE_SSE42 +#else +#define HASH_MODE USE_DEFAULT +#endif + namespace { +enum class SIMDMode : char { NOSIMD, SSE4, AVX2 }; + #define CHECK_IMPLEMENTED(KERNEL, FUNCNAME, TYPE) \ if (!KERNEL) { \ std::stringstream ss; \ @@ -63,213 +72,754 @@ namespace { return Status::NotImplemented(ss.str()); \ } -// ---------------------------------------------------------------------- -// Unique implementation +// This is a slight design concession -- some hash actions have the possibility +// of failure. Rather than introduce extra error checking into all actions, we +// will raise an internal exception so that only the actions where errors can +// occur will experience the extra overhead +class HashException : public std::exception { + public: + explicit HashException(const std::string& msg, StatusCode code = StatusCode::Invalid) + : msg_(msg), code_(code) {} + + ~HashException() throw() override {} + + const char* what() const throw() override; + + StatusCode code() const { return code_; } -class UniqueAction { + private: + std::string msg_; + StatusCode code_; +}; + +const char* HashException::what() const throw() { return msg_.c_str(); } + +class HashTable { public: - UniqueAction(const std::shared_ptr& type, MemoryPool* pool) {} + HashTable(const std::shared_ptr& type, MemoryPool* pool) + : type_(type), + pool_(pool), + initialized_(false), + hash_table_(nullptr), + hash_slots_(nullptr), + hash_table_size_(0), + mod_bitmask_(0) {} - Status Reset() { return Status::OK(); } + virtual ~HashTable() {} - Status Reserve(const int64_t length) { return Status::OK(); } + virtual Status Append(const ArrayData& input) = 0; + virtual Status Flush(Datum* out) = 0; + virtual Status GetDictionary(std::shared_ptr* out) = 0; - void ObserveNull() {} + protected: + Status Init(int64_t elements); - template - void ObserveFound(Index index) {} + std::shared_ptr type_; + MemoryPool* pool_; + bool initialized_; + + // The hash table contains integer indices that reference the set of observed + // distinct values + std::shared_ptr hash_table_; + hash_slot_t* hash_slots_; + + /// Size of the table. Must be a power of 2. + int64_t hash_table_size_; - template - void ObserveNotFound(Index index) {} + /// Size at which we decide to resize + int64_t hash_table_load_threshold_; - Status Flush(Datum* out) { return Status::OK(); } + // Store hash_table_size_ - 1, so that j & mod_bitmask_ is equivalent to j % + // hash_table_size_, but uses far fewer CPU cycles + int64_t mod_bitmask_; }; +Status HashTable::Init(int64_t elements) { + DCHECK_EQ(elements, BitUtil::NextPower2(elements)); + RETURN_NOT_OK(internal::NewHashTable(elements, pool_, &hash_table_)); + hash_slots_ = reinterpret_cast(hash_table_->mutable_data()); + hash_table_size_ = elements; + hash_table_load_threshold_ = + static_cast(static_cast(elements) * kMaxHashTableLoad); + mod_bitmask_ = elements - 1; + initialized_ = true; + return Status::OK(); +} + +template +class HashTableKernel : public HashTable {}; + +// Types of hash actions +// +// unique: append to dictionary when not found, no-op with slot +// dictionary-encode: append to dictionary when not found, append slot # +// match: raise or set null when not found, otherwise append slot # +// isin: set false when not found, otherwise true +// value counts: append to dictionary when not found, increment count for slot + +template +class HashDictionary {}; + // ---------------------------------------------------------------------- -// Dictionary encode implementation +// Hash table pass for nulls -class DictEncodeAction { +template +class HashTableKernel> : public HashTable { public: - DictEncodeAction(const std::shared_ptr& type, MemoryPool* pool) - : indices_builder_(pool) {} + using HashTable::HashTable; - Status Reset() { - indices_builder_.Reset(); + Status Init() { + // No-op, do not even need to initialize hash table return Status::OK(); } - Status Reserve(const int64_t length) { return indices_builder_.Reserve(length); } - - void ObserveNull() { indices_builder_.UnsafeAppendNull(); } + Status Append(const ArrayData& arr) override { + if (!initialized_) { + RETURN_NOT_OK(Init()); + } + auto action = checked_cast(this); + RETURN_NOT_OK(action->Reserve(arr.length)); + for (int64_t i = 0; i < arr.length; ++i) { + action->ObserveNull(); + } + return Status::OK(); + } - template - void ObserveFound(Index index) { - indices_builder_.UnsafeAppend(index); + Status GetDictionary(std::shared_ptr* out) override { + // TODO(wesm): handle null being a valid dictionary value + auto null_array = std::make_shared(0); + *out = null_array->data(); + return Status::OK(); } +}; + +// ---------------------------------------------------------------------- +// Hash table pass for primitive types - template - void ObserveNotFound(Index index) { - return ObserveFound(index); +template +struct HashDictionary> { + using T = typename Type::c_type; + + explicit HashDictionary(MemoryPool* pool) : pool(pool), size(0), capacity(0) {} + + Status Init() { + this->size = 0; + RETURN_NOT_OK(AllocateResizableBuffer(this->pool, 0, &this->buffer)); + return Resize(kInitialHashTableSize); } - Status Flush(Datum* out) { - std::shared_ptr result; - RETURN_NOT_OK(indices_builder_.FinishInternal(&result)); - out->value = std::move(result); + Status DoubleSize() { return Resize(this->size * 2); } + + Status Resize(const int64_t elements) { + RETURN_NOT_OK(this->buffer->Resize(elements * sizeof(T))); + + this->capacity = elements; + this->values = reinterpret_cast(this->buffer->mutable_data()); return Status::OK(); } - private: - Int32Builder indices_builder_; + MemoryPool* pool; + std::shared_ptr buffer; + T* values; + int64_t size; + int64_t capacity; }; -// ---------------------------------------------------------------------- -// Base class for all hash kernel implementations +#define GENERIC_HASH_PASS(HASH_INNER_LOOP) \ + if (arr.null_count != 0) { \ + internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset, arr.length); \ + for (int64_t i = 0; i < arr.length; ++i) { \ + const bool is_null = valid_reader.IsNotSet(); \ + valid_reader.Next(); \ + \ + if (is_null) { \ + action->ObserveNull(); \ + continue; \ + } \ + \ + HASH_INNER_LOOP(); \ + } \ + } else { \ + for (int64_t i = 0; i < arr.length; ++i) { \ + HASH_INNER_LOOP(); \ + } \ + } -class HashKernelImpl : public HashKernel { +template +class HashTableKernel< + Type, Action, + typename std::enable_if::value && !is_8bit_int::value>::type> + : public HashTable { public: - Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override { - DCHECK_EQ(Datum::ARRAY, input.kind()); - RETURN_NOT_OK(Append(ctx, *input.array())); - return Flush(out); + using T = typename Type::c_type; + + HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) + : HashTable(type, pool), dict_(pool) {} + + Status Init() { + RETURN_NOT_OK(dict_.Init()); + return HashTable::Init(kInitialHashTableSize); } - Status Append(FunctionContext* ctx, const ArrayData& input) override { - std::lock_guard guard(lock_); - return Append(input); + Status Append(const ArrayData& arr) override { + if (!initialized_) { + RETURN_NOT_OK(Init()); + } + + const T* values = GetValues(arr, 1); + auto action = checked_cast(this); + + RETURN_NOT_OK(action->Reserve(arr.length)); + +#define HASH_INNER_LOOP() \ + const T value = values[i]; \ + int64_t j = HashValue(value) & mod_bitmask_; \ + hash_slot_t slot = hash_slots_[j]; \ + \ + while (kHashSlotEmpty != slot && dict_.values[slot] != value) { \ + ++j; \ + if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ + j = 0; \ + } \ + slot = hash_slots_[j]; \ + } \ + \ + if (slot == kHashSlotEmpty) { \ + if (!Action::allow_expand) { \ + throw HashException("Encountered new dictionary value"); \ + } \ + \ + slot = static_cast(dict_.size); \ + hash_slots_[j] = slot; \ + dict_.values[dict_.size++] = value; \ + \ + action->ObserveNotFound(slot); \ + \ + if (ARROW_PREDICT_FALSE(dict_.size > hash_table_load_threshold_)) { \ + RETURN_NOT_OK(action->DoubleSize()); \ + } \ + } else { \ + action->ObserveFound(slot); \ } - virtual Status Append(const ArrayData& arr) = 0; + GENERIC_HASH_PASS(HASH_INNER_LOOP); + +#undef HASH_INNER_LOOP + + return Status::OK(); + } + + Status GetDictionary(std::shared_ptr* out) override { + // TODO(wesm): handle null being in the dictionary + auto dict_data = dict_.buffer; + RETURN_NOT_OK(dict_data->Resize(dict_.size * sizeof(T), false)); + dict_data->ZeroPadding(); + + *out = ArrayData::Make(type_, dict_.size, {nullptr, dict_data}, 0); + return Status::OK(); + } protected: - std::mutex lock_; + int64_t HashValue(const T& value) const { + // TODO(wesm): Use faster hash function for C types + return HashUtil::Hash(&value, sizeof(T), 0); + } + + Status DoubleTableSize() { +#define PRIMITIVE_INNER_LOOP \ + const T value = dict_.values[index]; \ + int64_t j = HashValue(value) & new_mod_bitmask; + + DOUBLE_TABLE_SIZE(, PRIMITIVE_INNER_LOOP); + +#undef PRIMITIVE_INNER_LOOP + + return dict_.Resize(hash_table_size_); + } + + HashDictionary dict_; }; // ---------------------------------------------------------------------- -// Base class for all "regular" hash kernel implementations -// (NullType has a separate implementation) +// Hash table for boolean types -template -class RegularHashKernelImpl : public HashKernelImpl { +template +class HashTableKernel> : public HashTable { public: - RegularHashKernelImpl(const std::shared_ptr& type, MemoryPool* pool) - : pool_(pool), type_(type), action_(type, pool) {} - - Status Reset() override { - memo_table_.reset(new MemoTable(0)); - return action_.Reset(); + HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) + : HashTable(type, pool) { + std::fill(table_, table_ + 2, kHashSlotEmpty); } Status Append(const ArrayData& arr) override { - RETURN_NOT_OK(action_.Reserve(arr.length)); - return ArrayDataVisitor::Visit(arr, this); + auto action = checked_cast(this); + + RETURN_NOT_OK(action->Reserve(arr.length)); + + internal::BitmapReader value_reader(arr.buffers[1]->data(), arr.offset, arr.length); + +#define HASH_INNER_LOOP() \ + if (slot == kHashSlotEmpty) { \ + if (!Action::allow_expand) { \ + throw HashException("Encountered new dictionary value"); \ + } \ + table_[j] = slot = static_cast(dict_.size()); \ + dict_.push_back(value); \ + action->ObserveNotFound(slot); \ + } else { \ + action->ObserveFound(slot); \ } - Status Flush(Datum* out) override { return action_.Flush(out); } + if (arr.null_count != 0) { + internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset, arr.length); + for (int64_t i = 0; i < arr.length; ++i) { + const bool is_null = valid_reader.IsNotSet(); + valid_reader.Next(); + if (is_null) { + value_reader.Next(); + action->ObserveNull(); + continue; + } + const bool value = value_reader.IsSet(); + value_reader.Next(); + const int j = value ? 1 : 0; + hash_slot_t slot = table_[j]; + HASH_INNER_LOOP(); + } + } else { + for (int64_t i = 0; i < arr.length; ++i) { + const bool value = value_reader.IsSet(); + value_reader.Next(); + const int j = value ? 1 : 0; + hash_slot_t slot = table_[j]; + HASH_INNER_LOOP(); + } + } + +#undef HASH_INNER_LOOP + + return Status::OK(); + } Status GetDictionary(std::shared_ptr* out) override { - return DictionaryTraits::GetDictionaryArrayData(pool_, type_, *memo_table_, - 0 /* start_offset */, out); + BooleanBuilder builder(pool_); + for (const bool value : dict_) { + RETURN_NOT_OK(builder.Append(value)); + } + return builder.FinishInternal(out); } - Status VisitNull() { - action_.ObserveNull(); + private: + hash_slot_t table_[2]; + std::vector dict_; +}; + +// ---------------------------------------------------------------------- +// Hash table pass for variable-length binary types + +template +class HashTableKernel> : public HashTable { + public: + HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) + : HashTable(type, pool), dict_offsets_(pool), dict_data_(pool), dict_size_(0) {} + + Status Init() { + RETURN_NOT_OK(dict_offsets_.Resize(kInitialHashTableSize)); + + // We append the end offset after each append to the dictionary, so this + // sets the initial condition for the length-0 case + // + // initial offsets (dict size == 0): 0 + // after 1st dict entry of length 3: 0 3 + // after 2nd dict entry of length 4: 0 3 7 + RETURN_NOT_OK(dict_offsets_.Append(0)); + return HashTable::Init(kInitialHashTableSize); + } + + Status Append(const ArrayData& arr) override { + constexpr uint8_t empty_value = 0; + if (!initialized_) { + RETURN_NOT_OK(Init()); + } + + const int32_t* offsets = GetValues(arr, 1); + const uint8_t* data; + if (arr.buffers[2].get() == nullptr) { + data = &empty_value; + } else { + data = GetValues(arr, 2); + } + + auto action = checked_cast(this); + RETURN_NOT_OK(action->Reserve(arr.length)); + +#define HASH_INNER_LOOP() \ + const int32_t position = offsets[i]; \ + const int32_t length = offsets[i + 1] - position; \ + const uint8_t* value = data + position; \ + \ + int64_t j = HashValue(value, length) & mod_bitmask_; \ + hash_slot_t slot = hash_slots_[j]; \ + \ + const int32_t* dict_offsets = dict_offsets_.data(); \ + const uint8_t* dict_data = dict_data_.data(); \ + while (kHashSlotEmpty != slot && \ + !((dict_offsets[slot + 1] - dict_offsets[slot]) == length && \ + 0 == memcmp(value, dict_data + dict_offsets[slot], length))) { \ + ++j; \ + if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ + j = 0; \ + } \ + slot = hash_slots_[j]; \ + } \ + \ + if (slot == kHashSlotEmpty) { \ + if (!Action::allow_expand) { \ + throw HashException("Encountered new dictionary value"); \ + } \ + \ + slot = dict_size_++; \ + hash_slots_[j] = slot; \ + \ + RETURN_NOT_OK(dict_data_.Append(value, length)); \ + RETURN_NOT_OK(dict_offsets_.Append(static_cast(dict_data_.length()))); \ + \ + action->ObserveNotFound(slot); \ + \ + if (ARROW_PREDICT_FALSE(dict_size_ > hash_table_load_threshold_)) { \ + RETURN_NOT_OK(action->DoubleSize()); \ + } \ + } else { \ + action->ObserveFound(slot); \ + } + + GENERIC_HASH_PASS(HASH_INNER_LOOP); + +#undef HASH_INNER_LOOP + return Status::OK(); } - Status VisitValue(const Scalar& value) { - auto on_found = [this](int32_t memo_index) { action_.ObserveFound(memo_index); }; - auto on_not_found = [this](int32_t memo_index) { - action_.ObserveNotFound(memo_index); - }; - memo_table_->GetOrInsert(value, on_found, on_not_found); + Status GetDictionary(std::shared_ptr* out) override { + // TODO(wesm): handle null being in the dictionary + BufferVector buffers = {nullptr, nullptr, nullptr}; + + RETURN_NOT_OK(dict_offsets_.Finish(&buffers[1])); + RETURN_NOT_OK(dict_data_.Finish(&buffers[2])); + + *out = ArrayData::Make(type_, dict_size_, std::move(buffers), 0); return Status::OK(); } protected: - using MemoTable = typename HashTraits::MemoTableType; + int64_t HashValue(const uint8_t* data, int32_t length) const { + return HashUtil::Hash(data, length, 0); + } - MemoryPool* pool_; - std::shared_ptr type_; - Action action_; - std::unique_ptr memo_table_; + Status DoubleTableSize() { +#define VARBYTES_SETUP \ + const int32_t* dict_offsets = dict_offsets_.data(); \ + const uint8_t* dict_data = dict_data_.data() + +#define VARBYTES_COMPUTE_HASH \ + const int32_t length = dict_offsets[index + 1] - dict_offsets[index]; \ + const uint8_t* value = dict_data + dict_offsets[index]; \ + int64_t j = HashValue(value, length) & new_mod_bitmask + + DOUBLE_TABLE_SIZE(VARBYTES_SETUP, VARBYTES_COMPUTE_HASH); + +#undef VARBYTES_SETUP +#undef VARBYTES_COMPUTE_HASH + + return Status::OK(); + } + + TypedBufferBuilder dict_offsets_; + TypedBufferBuilder dict_data_; + int32_t dict_size_; }; // ---------------------------------------------------------------------- -// Hash kernel implementation for nulls +// Hash table pass for fixed size binary types -template -class NullHashKernelImpl : public HashKernelImpl { +template +class HashTableKernel> + : public HashTable { public: - NullHashKernelImpl(const std::shared_ptr& type, MemoryPool* pool) - : pool_(pool), type_(type), action_(type, pool) {} + HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) + : HashTable(type, pool), dict_data_(pool), dict_size_(0) { + const auto& fw_type = checked_cast(*type); + byte_width_ = fw_type.bit_width() / 8; + } - Status Reset() override { return action_.Reset(); } + Status Init() { + RETURN_NOT_OK(dict_data_.Resize(kInitialHashTableSize * byte_width_)); + return HashTable::Init(kInitialHashTableSize); + } Status Append(const ArrayData& arr) override { - RETURN_NOT_OK(action_.Reserve(arr.length)); - for (int64_t i = 0; i < arr.length; ++i) { - action_.ObserveNull(); + if (!initialized_) { + RETURN_NOT_OK(Init()); } - return Status::OK(); + + const uint8_t* data = GetValues(arr, 1); + + auto action = checked_cast(this); + RETURN_NOT_OK(action->Reserve(arr.length)); + +#define HASH_INNER_LOOP() \ + const uint8_t* value = data + i * byte_width_; \ + int64_t j = HashValue(value) & mod_bitmask_; \ + hash_slot_t slot = hash_slots_[j]; \ + \ + const uint8_t* dict_data = dict_data_.data(); \ + while (kHashSlotEmpty != slot && \ + !(0 == memcmp(value, dict_data + slot * byte_width_, byte_width_))) { \ + ++j; \ + if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ + j = 0; \ + } \ + slot = hash_slots_[j]; \ + } \ + \ + if (slot == kHashSlotEmpty) { \ + if (!Action::allow_expand) { \ + throw HashException("Encountered new dictionary value"); \ + } \ + \ + slot = dict_size_++; \ + hash_slots_[j] = slot; \ + \ + RETURN_NOT_OK(dict_data_.Append(value, byte_width_)); \ + \ + action->ObserveNotFound(slot); \ + \ + if (ARROW_PREDICT_FALSE(dict_size_ > hash_table_load_threshold_)) { \ + RETURN_NOT_OK(action->DoubleSize()); \ + } \ + } else { \ + action->ObserveFound(slot); \ } - Status Flush(Datum* out) override { return action_.Flush(out); } + GENERIC_HASH_PASS(HASH_INNER_LOOP); + +#undef HASH_INNER_LOOP + + return Status::OK(); + } Status GetDictionary(std::shared_ptr* out) override { - // TODO(wesm): handle null being a valid dictionary value - auto null_array = std::make_shared(0); - *out = null_array->data(); + // TODO(wesm): handle null being in the dictionary + BufferVector buffers = {nullptr, nullptr}; + RETURN_NOT_OK(dict_data_.Finish(&buffers[1])); + + *out = ArrayData::Make(type_, dict_size_, std::move(buffers), 0); return Status::OK(); } protected: - MemoryPool* pool_; - std::shared_ptr type_; - Action action_; + int64_t HashValue(const uint8_t* data) const { + return HashUtil::Hash(data, byte_width_, 0); + } + + Status DoubleTableSize() { +#define FIXED_BYTES_SETUP const uint8_t* dict_data = dict_data_.data() + +#define FIXED_BYTES_COMPUTE_HASH \ + int64_t j = HashValue(dict_data + index * byte_width_) & new_mod_bitmask + + DOUBLE_TABLE_SIZE(FIXED_BYTES_SETUP, FIXED_BYTES_COMPUTE_HASH); + +#undef FIXED_BYTES_SETUP +#undef FIXED_BYTES_COMPUTE_HASH + + return Status::OK(); + } + + int32_t byte_width_; + TypedBufferBuilder dict_data_; + int32_t dict_size_; }; // ---------------------------------------------------------------------- -// Kernel wrapper for generic hash table kernels +// Hash table pass for uint8 and int8 -template -struct HashKernelTraits {}; +template +inline int Hash8Bit(const T val) { + return 0; +} -template -struct HashKernelTraits> { - using HashKernelImpl = NullHashKernelImpl; -}; +template <> +inline int Hash8Bit(const uint8_t val) { + return val; +} + +template <> +inline int Hash8Bit(const int8_t val) { + return val + 128; +} template -struct HashKernelTraits> { - using HashKernelImpl = RegularHashKernelImpl; +class HashTableKernel> : public HashTable { + public: + using T = typename Type::c_type; + + HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) + : HashTable(type, pool) { + std::fill(table_, table_ + 256, kHashSlotEmpty); + } + + Status Append(const ArrayData& arr) override { + const T* values = GetValues(arr, 1); + auto action = checked_cast(this); + RETURN_NOT_OK(action->Reserve(arr.length)); + +#define HASH_INNER_LOOP() \ + const T value = values[i]; \ + const int hash = Hash8Bit(value); \ + hash_slot_t slot = table_[hash]; \ + \ + if (slot == kHashSlotEmpty) { \ + if (!Action::allow_expand) { \ + throw HashException("Encountered new dictionary value"); \ + } \ + \ + slot = static_cast(dict_.size()); \ + table_[hash] = slot; \ + dict_.push_back(value); \ + action->ObserveNotFound(slot); \ + } else { \ + action->ObserveFound(slot); \ + } + + GENERIC_HASH_PASS(HASH_INNER_LOOP); + +#undef HASH_INNER_LOOP + + return Status::OK(); + } + + Status GetDictionary(std::shared_ptr* out) override { + using BuilderType = typename TypeTraits::BuilderType; + BuilderType builder(pool_); + + for (const T value : dict_) { + RETURN_NOT_OK(builder.Append(value)); + } + + return builder.FinishInternal(out); + } + + private: + hash_slot_t table_[256]; + std::vector dict_; }; -template -struct HashKernelTraits> { - using HashKernelImpl = RegularHashKernelImpl; +// ---------------------------------------------------------------------- +// Unique implementation + +template +class UniqueImpl : public HashTableKernel> { + public: + static constexpr bool allow_expand = true; + using Base = HashTableKernel>; + using Base::Base; + + Status Reserve(const int64_t length) { return Status::OK(); } + + void ObserveFound(const hash_slot_t slot) {} + void ObserveNull() {} + void ObserveNotFound(const hash_slot_t slot) {} + + Status DoubleSize() { return Base::DoubleTableSize(); } + + Status Append(const ArrayData& input) override { return Base::Append(input); } + + Status Flush(Datum* out) override { + // No-op + return Status::OK(); + } }; -template -struct HashKernelTraits> { - using HashKernelImpl = RegularHashKernelImpl; +// ---------------------------------------------------------------------- +// Dictionary encode implementation + +template +class DictEncodeImpl : public HashTableKernel> { + public: + static constexpr bool allow_expand = true; + using Base = HashTableKernel; + + DictEncodeImpl(const std::shared_ptr& type, MemoryPool* pool) + : Base(type, pool), indices_builder_(pool) {} + + Status Reserve(const int64_t length) { return indices_builder_.Reserve(length); } + + void ObserveNull() { indices_builder_.UnsafeAppendToBitmap(false); } + + void ObserveFound(const hash_slot_t slot) { indices_builder_.UnsafeAppend(slot); } + + void ObserveNotFound(const hash_slot_t slot) { return ObserveFound(slot); } + + Status DoubleSize() { return Base::DoubleTableSize(); } + + Status Flush(Datum* out) override { + std::shared_ptr result; + RETURN_NOT_OK(indices_builder_.FinishInternal(&result)); + out->value = std::move(result); + return Status::OK(); + } + + using Base::Append; + + private: + Int32Builder indices_builder_; }; -template -struct HashKernelTraits> { - using HashKernelImpl = RegularHashKernelImpl; +// ---------------------------------------------------------------------- +// Kernel wrapper for generic hash table kernels + +class HashKernelImpl : public HashKernel { + public: + explicit HashKernelImpl(std::unique_ptr hasher) + : hasher_(std::move(hasher)) {} + + Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override { + DCHECK_EQ(Datum::ARRAY, input.kind()); + RETURN_NOT_OK(Append(ctx, *input.array())); + return Flush(out); + } + + Status Append(FunctionContext* ctx, const ArrayData& input) override { + std::lock_guard guard(lock_); + try { + RETURN_NOT_OK(hasher_->Append(input)); + } catch (const HashException& e) { + return Status(e.code(), e.what()); + } + return Status::OK(); + } + + Status Flush(Datum* out) override { return hasher_->Flush(out); } + + Status GetDictionary(std::shared_ptr* out) override { + return hasher_->GetDictionary(out); + } + + private: + std::mutex lock_; + std::unique_ptr hasher_; }; } // namespace Status GetUniqueKernel(FunctionContext* ctx, const std::shared_ptr& type, std::unique_ptr* out) { - std::unique_ptr kernel; + std::unique_ptr hasher; -#define UNIQUE_CASE(InType) \ - case InType::type_id: \ - kernel.reset(new typename HashKernelTraits::HashKernelImpl( \ - type, ctx->memory_pool())); \ +#define UNIQUE_CASE(InType) \ + case InType::type_id: \ + hasher.reset(new UniqueImpl(type, ctx->memory_pool())); \ break switch (type->id()) { @@ -300,22 +850,19 @@ Status GetUniqueKernel(FunctionContext* ctx, const std::shared_ptr& ty #undef UNIQUE_CASE - CHECK_IMPLEMENTED(kernel, "unique", type); - RETURN_NOT_OK(kernel->Reset()); - *out = std::move(kernel); + CHECK_IMPLEMENTED(hasher, "unique", type); + out->reset(new HashKernelImpl(std::move(hasher))); return Status::OK(); } Status GetDictionaryEncodeKernel(FunctionContext* ctx, const std::shared_ptr& type, std::unique_ptr* out) { - std::unique_ptr kernel; + std::unique_ptr hasher; -#define DICTIONARY_ENCODE_CASE(InType) \ - case InType::type_id: \ - kernel.reset(new \ - typename HashKernelTraits::HashKernelImpl( \ - type, ctx->memory_pool())); \ +#define DICTIONARY_ENCODE_CASE(InType) \ + case InType::type_id: \ + hasher.reset(new DictEncodeImpl(type, ctx->memory_pool())); \ break switch (type->id()) { @@ -346,9 +893,8 @@ Status GetDictionaryEncodeKernel(FunctionContext* ctx, #undef DICTIONARY_ENCODE_CASE - CHECK_IMPLEMENTED(kernel, "dictionary-encode", type); - RETURN_NOT_OK(kernel->Reset()); - *out = std::move(kernel); + CHECK_IMPLEMENTED(hasher, "dictionary-encode", type); + out->reset(new HashKernelImpl(std::move(hasher))); return Status::OK(); } diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 122c551bf42e7..89229169ecdb7 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -21,6 +21,7 @@ # Headers: top level install(FILES + armce-util.h bit-stream-utils.h bit-util.h bpacking.h @@ -59,6 +60,9 @@ install(FILES windows_compatibility.h DESTINATION include/arrow/util) +# Armv8 CRC support +configure_file(config.in.cmake ${CMAKE_CURRENT_SOURCE_DIR}/my_config.h) + ####################################### # arrow_test_main ####################################### From 1cf378a413cb5c87810785349304515f8643adb3 Mon Sep 17 00:00:00 2001 From: Yuqi Gu Date: Sun, 25 Nov 2018 15:31:07 +0000 Subject: [PATCH 2/3] Rebase the patch to master Change-Id: I3044a89bae619968e340636996f014a0134f1030 Signed-off-by: Yuqi Gu --- cpp/cmake_modules/SetupCxxFlags.cmake | 6 + cpp/src/arrow/builder.cc | 347 +++-------- cpp/src/arrow/compute/kernels/hash.cc | 806 +++++--------------------- cpp/src/arrow/util/CMakeLists.txt | 3 - cpp/src/arrow/util/armce-util.h | 90 +++ cpp/src/arrow/util/hash-util.h | 97 +++- 6 files changed, 386 insertions(+), 963 deletions(-) create mode 100644 cpp/src/arrow/util/armce-util.h diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index d239d69a93d68..893ec360d3e55 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -22,6 +22,8 @@ include(CheckCXXCompilerFlag) CHECK_CXX_COMPILER_FLAG("-msse4.2" CXX_SUPPORTS_SSE4_2) # power compiler flags CHECK_CXX_COMPILER_FLAG("-maltivec" CXX_SUPPORTS_ALTIVEC) +# Arm64 compiler flags +CHECK_CXX_COMPILER_FLAG("-march=armv8-a+crc" CXX_SUPPORTS_ARMCRC) # This ensures that things like gnu++11 get passed correctly set(CMAKE_CXX_STANDARD 11) @@ -220,6 +222,10 @@ if (CXX_SUPPORTS_ALTIVEC AND ARROW_ALTIVEC) set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -maltivec") endif() +if (CXX_SUPPORTS_ARMCRC) + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=armv8-a+crc") +endif() + if (ARROW_USE_SIMD) add_definitions(-DARROW_USE_SIMD) endif() diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index b7a0fb5513a4e..6aa415bbed2f3 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -33,18 +33,9 @@ #include "arrow/util/bit-util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" -#include "arrow/util/hash-util.h" -#include "arrow/util/hash.h" +#include "arrow/util/hashing.h" #include "arrow/util/logging.h" -#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_USE_ARMCE) -#define HASH_MODE USE_ARMCRC -#elif defined(ARROW_USE_SSE) -#define HASH_MODE USE_SSE42 -#else -#define HASH_MODE USE_DEFAULT -#endif - namespace arrow { using internal::AdaptiveIntBuilderBase; @@ -738,7 +729,7 @@ Status BooleanBuilder::AppendValues(const std::vector& values, int64_t i = 0; internal::GenerateBitsUnrolled(raw_data_, length_, length, - [values, &i]() -> bool { return values[i++]; }); + [&values, &i]() -> bool { return values[i++]; }); // this updates length_ ArrayBuilder::UnsafeAppendToBitmap(is_valid); @@ -751,7 +742,7 @@ Status BooleanBuilder::AppendValues(const std::vector& values) { int64_t i = 0; internal::GenerateBitsUnrolled(raw_data_, length_, length, - [values, &i]() -> bool { return values[i++]; }); + [&values, &i]() -> bool { return values[i++]; }); // this updates length_ ArrayBuilder::UnsafeSetNotNull(length); @@ -761,152 +752,42 @@ Status BooleanBuilder::AppendValues(const std::vector& values) { // ---------------------------------------------------------------------- // DictionaryBuilder -using internal::DictionaryScalar; -using internal::WrappedBinary; - -namespace { - -// A helper class to manage a hash table embedded in a typed Builder. -template -struct DictionaryHashHelper {}; - -// DictionaryHashHelper implementation for primitive types template -struct DictionaryHashHelper> { - using Builder = typename TypeTraits::BuilderType; - using Scalar = typename DictionaryScalar::type; - - // Get the dictionary value at the given builder index - static Scalar GetDictionaryValue(const Builder& builder, int64_t index) { - return builder.GetValue(index); - } - - // Compute the hash of a scalar value - static int64_t HashValue(const Scalar& value, int byte_width) { - return HashUtil::Hash(&value, sizeof(Scalar), 0); - } - - // Return whether the dictionary value at the given builder index is unequal to value - static bool SlotDifferent(const Builder& builder, int64_t index, const Scalar& value) { - return GetDictionaryValue(builder, index) != value; - } - - // Append a value to the builder - static Status AppendValue(Builder& builder, const Scalar& value) { - return builder.Append(value); - } - - // Append another builder's contents to the builder - static Status AppendArray(Builder& builder, const Array& in_array) { - const auto& array = checked_cast(in_array); - return builder.AppendValues(reinterpret_cast(array.values()->data()), - array.length(), nullptr); - } +class DictionaryBuilder::MemoTableImpl + : public internal::HashTraits::MemoTableType { + public: + using MemoTableType = typename internal::HashTraits::MemoTableType; + using MemoTableType::MemoTableType; }; -// DictionaryHashHelper implementation for StringType / BinaryType template -struct DictionaryHashHelper> { - using Builder = typename TypeTraits::BuilderType; - using Scalar = typename DictionaryScalar::type; - - static Scalar GetDictionaryValue(const Builder& builder, int64_t index) { - int32_t v_length; - const uint8_t* v_ptr = builder.GetValue(index, &v_length); - return WrappedBinary(v_ptr, v_length); - } - - static int64_t HashValue(const Scalar& value, int byte_width) { - return HashUtil::Hash(value.ptr_, value.length_, 0); - } - - static bool SlotDifferent(const Builder& builder, int64_t index, const Scalar& value) { - int32_t other_length; - const uint8_t* other_ptr = builder.GetValue(index, &other_length); - return value.length_ != other_length || - memcmp(value.ptr_, other_ptr, other_length) != 0; - } - - static Status AppendValue(Builder& builder, const Scalar& value) { - return builder.Append(value.ptr_, value.length_); - } - - static Status AppendArray(Builder& builder, const Array& in_array) { - const auto& array = checked_cast(in_array); - for (uint64_t index = 0, limit = array.length(); index < limit; ++index) { - int32_t length; - const uint8_t* ptr = array.GetValue(index, &length); - RETURN_NOT_OK(builder.Append(ptr, length)); - } - return Status::OK(); - } -}; - -// DictionaryHashHelper implementation for FixedSizeBinaryType -template -struct DictionaryHashHelper> { - using Builder = typename TypeTraits::BuilderType; - using Scalar = typename DictionaryScalar::type; - - static Scalar GetDictionaryValue(const Builder& builder, int64_t index) { - return builder.GetValue(index); - } - - static int64_t HashValue(const Scalar& value, int byte_width) { - return HashUtil::Hash(value, byte_width, 0); - } - - static bool SlotDifferent(const Builder& builder, int64_t index, const uint8_t* value) { - const int32_t width = builder.byte_width(); - const uint8_t* other_value = builder.GetValue(index); - return memcmp(value, other_value, width) != 0; - } - - static Status AppendValue(Builder& builder, const Scalar& value) { - return builder.Append(value); - } - - static Status AppendArray(Builder& builder, const Array& in_array) { - const auto& array = checked_cast(in_array); - for (uint64_t index = 0, limit = array.length(); index < limit; ++index) { - const Scalar value = array.GetValue(index); - RETURN_NOT_OK(builder.Append(value)); - } - return Status::OK(); - } -}; - -} // namespace +DictionaryBuilder::~DictionaryBuilder() {} template DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& type, MemoryPool* pool) - : ArrayBuilder(type, pool), - hash_slots_(nullptr), - dict_builder_(type, pool), - overflow_dict_builder_(type, pool), - values_builder_(pool), - byte_width_(-1) {} + : ArrayBuilder(type, pool), byte_width_(-1), values_builder_(pool) { + DCHECK_EQ(T::type_id, type->id()) << "inconsistent type passed to DictionaryBuilder"; +} DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& type, MemoryPool* pool) - : ArrayBuilder(type, pool), values_builder_(pool) {} + : ArrayBuilder(type, pool), values_builder_(pool) { + DCHECK_EQ(Type::NA, type->id()) << "inconsistent type passed to DictionaryBuilder"; +} template <> DictionaryBuilder::DictionaryBuilder( const std::shared_ptr& type, MemoryPool* pool) : ArrayBuilder(type, pool), - hash_slots_(nullptr), - dict_builder_(type, pool), - overflow_dict_builder_(type, pool), - values_builder_(pool), byte_width_(checked_cast(*type).byte_width()) {} template void DictionaryBuilder::Reset() { - dict_builder_.Reset(); - overflow_dict_builder_.Reset(); + ArrayBuilder::Reset(); values_builder_.Reset(); + memo_table_.reset(); + delta_offset_ = 0; } template @@ -916,14 +797,10 @@ Status DictionaryBuilder::Resize(int64_t capacity) { } if (capacity_ == 0) { - // Fill the initial hash table - RETURN_NOT_OK(internal::NewHashTable(kInitialHashTableSize, pool_, &hash_table_)); - hash_slots_ = reinterpret_cast(hash_table_->mutable_data()); - hash_table_size_ = kInitialHashTableSize; - entry_id_offset_ = 0; - mod_bitmask_ = kInitialHashTableSize - 1; - hash_table_load_threshold_ = - static_cast(static_cast(capacity) * kMaxHashTableLoad); + // Initialize hash table + // XXX should we let the user pass additional size heuristics? + memo_table_.reset(new MemoTableImpl(0)); + delta_offset_ = 0; } RETURN_NOT_OK(values_builder_.Resize(capacity)); return ArrayBuilder::Resize(capacity); @@ -937,67 +814,12 @@ Status DictionaryBuilder::Resize(int64_t capacity) { return ArrayBuilder::Resize(capacity); } -template -int64_t DictionaryBuilder::HashValue(const Scalar& value) { - return DictionaryHashHelper::HashValue(value, byte_width_); -} - -template -typename DictionaryBuilder::Scalar DictionaryBuilder::GetDictionaryValue( - typename TypeTraits::BuilderType& dictionary_builder, int64_t index) { - return DictionaryHashHelper::GetDictionaryValue(dictionary_builder, index); -} - -template -bool DictionaryBuilder::SlotDifferent(hash_slot_t index, const Scalar& value) { - DCHECK_GE(index, 0); - if (index >= entry_id_offset_) { - // Lookup delta dictionary - DCHECK_LT(index - entry_id_offset_, dict_builder_.length()); - return DictionaryHashHelper::SlotDifferent( - dict_builder_, static_cast(index - entry_id_offset_), value); - } else { - DCHECK_LT(index, overflow_dict_builder_.length()); - return DictionaryHashHelper::SlotDifferent(overflow_dict_builder_, - static_cast(index), value); - } -} - -template -Status DictionaryBuilder::AppendDictionary(const Scalar& value) { - return DictionaryHashHelper::AppendValue(dict_builder_, value); -} - template Status DictionaryBuilder::Append(const Scalar& value) { RETURN_NOT_OK(Reserve(1)); - // Based on DictEncoder::Put - int64_t j = HashValue(value) & mod_bitmask_; - hash_slot_t index = hash_slots_[j]; - - // Find an empty slot - while (kHashSlotEmpty != index && SlotDifferent(index, value)) { - // Linear probing - ++j; - if (j == hash_table_size_) { - j = 0; - } - index = hash_slots_[j]; - } - - if (index == kHashSlotEmpty) { - // Not in the hash table, so we insert it now - index = static_cast(dict_builder_.length() + entry_id_offset_); - hash_slots_[j] = index; - RETURN_NOT_OK(AppendDictionary(value)); - - if (ARROW_PREDICT_FALSE(static_cast(dict_builder_.length()) > - hash_table_load_threshold_)) { - RETURN_NOT_OK(DoubleTableSize()); - } - } - RETURN_NOT_OK(values_builder_.Append(index)); + auto memo_index = memo_table_->GetOrInsert(value); + RETURN_NOT_OK(values_builder_.Append(memo_index)); return Status::OK(); } @@ -1029,48 +851,24 @@ Status DictionaryBuilder::AppendArray(const Array& array) { return Status::OK(); } -template <> -Status DictionaryBuilder::AppendArray(const Array& array) { - if (!type_->Equals(*array.type())) { - return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type"); - } - - const auto& numeric_array = checked_cast(array); - for (int64_t i = 0; i < array.length(); i++) { - if (array.IsNull(i)) { - RETURN_NOT_OK(AppendNull()); - } else { - RETURN_NOT_OK(Append(numeric_array.Value(i))); - } - } - return Status::OK(); -} - -template -Status DictionaryBuilder::DoubleTableSize() { -#define INNER_LOOP \ - int64_t j = HashValue(GetDictionaryValue(dict_builder_, index)) & new_mod_bitmask - - DOUBLE_TABLE_SIZE(, INNER_LOOP); - - return Status::OK(); -} - template Status DictionaryBuilder::FinishInternal(std::shared_ptr* out) { + // Finalize indices array + RETURN_NOT_OK(values_builder_.FinishInternal(out)); + + // Generate dictionary array from hash table contents std::shared_ptr dictionary; - entry_id_offset_ += dict_builder_.length(); - RETURN_NOT_OK(dict_builder_.Finish(&dictionary)); + std::shared_ptr dictionary_data; - // Store current dict entries for further uses of this DictionaryBuilder - RETURN_NOT_OK( - DictionaryHashHelper::AppendArray(overflow_dict_builder_, *dictionary)); - DCHECK_EQ(entry_id_offset_, overflow_dict_builder_.length()); + RETURN_NOT_OK(internal::DictionaryTraits::GetDictionaryArrayData( + pool_, type_, *memo_table_, delta_offset_, &dictionary_data)); + dictionary = MakeArray(dictionary_data); - RETURN_NOT_OK(values_builder_.FinishInternal(out)); + // Set type of array data to the right dictionary type (*out)->type = std::make_shared((*out)->type, dictionary); - dict_builder_.Reset(); + // Update internals for further uses of this DictionaryBuilder + delta_offset_ = memo_table_->size(); values_builder_.Reset(); return Status::OK(); @@ -1089,26 +887,42 @@ Status DictionaryBuilder::FinishInternal(std::shared_ptr* o // StringType and BinaryType specializations // -#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \ - \ - template <> \ - Status DictionaryBuilder::AppendArray(const Array& array) { \ - const BinaryArray& binary_array = checked_cast(array); \ - WrappedBinary value(nullptr, 0); \ - for (int64_t i = 0; i < array.length(); i++) { \ - if (array.IsNull(i)) { \ - RETURN_NOT_OK(AppendNull()); \ - } else { \ - value.ptr_ = binary_array.GetValue(i, &value.length_); \ - RETURN_NOT_OK(Append(value)); \ - } \ - } \ - return Status::OK(); \ +#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \ + \ + template <> \ + Status DictionaryBuilder::AppendArray(const Array& array) { \ + using ArrayType = typename TypeTraits::ArrayType; \ + const ArrayType& binary_array = checked_cast(array); \ + for (int64_t i = 0; i < array.length(); i++) { \ + if (array.IsNull(i)) { \ + RETURN_NOT_OK(AppendNull()); \ + } else { \ + RETURN_NOT_OK(Append(binary_array.GetView(i))); \ + } \ + } \ + return Status::OK(); \ } BINARY_DICTIONARY_SPECIALIZATIONS(StringType); BINARY_DICTIONARY_SPECIALIZATIONS(BinaryType); +template <> +Status DictionaryBuilder::AppendArray(const Array& array) { + if (!type_->Equals(*array.type())) { + return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type"); + } + + const auto& typed_array = checked_cast(array); + for (int64_t i = 0; i < array.length(); i++) { + if (array.IsNull(i)) { + RETURN_NOT_OK(AppendNull()); + } else { + RETURN_NOT_OK(Append(typed_array.GetValue(i))); + } + } + return Status::OK(); +} + template class DictionaryBuilder; template class DictionaryBuilder; template class DictionaryBuilder; @@ -1316,6 +1130,19 @@ const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const { return value_data_builder_.data() + offset; } +util::string_view BinaryBuilder::GetView(int64_t i) const { + const int32_t* offsets = offsets_builder_.data(); + int32_t offset = offsets[i]; + int32_t value_length; + if (i == (length_ - 1)) { + value_length = static_cast(value_data_builder_.length()) - offset; + } else { + value_length = offsets[i + 1] - offset; + } + return util::string_view( + reinterpret_cast(value_data_builder_.data() + offset), value_length); +} + StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {} Status StringBuilder::AppendValues(const std::vector& values, @@ -1414,6 +1241,12 @@ FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr& byte_width_(checked_cast(*type).byte_width()), byte_builder_(pool) {} +#ifndef NDEBUG +void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) { + DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder"; +} +#endif + Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length, const uint8_t* valid_bytes) { RETURN_NOT_OK(Reserve(length)); @@ -1421,10 +1254,6 @@ Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length, return byte_builder_.Append(data, length * byte_width_); } -Status FixedSizeBinaryBuilder::Append(const std::string& value) { - return Append(reinterpret_cast(value.c_str())); -} - Status FixedSizeBinaryBuilder::AppendNull() { RETURN_NOT_OK(Reserve(1)); UnsafeAppendToBitmap(false); @@ -1457,6 +1286,12 @@ const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const { return data_ptr + i * byte_width_; } +util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const { + const uint8_t* data_ptr = byte_builder_.data(); + return util::string_view(reinterpret_cast(data_ptr + i * byte_width_), + byte_width_); +} + // ---------------------------------------------------------------------- // Struct diff --git a/cpp/src/arrow/compute/kernels/hash.cc b/cpp/src/arrow/compute/kernels/hash.cc index 81801e2743b04..c057ea5736139 100644 --- a/cpp/src/arrow/compute/kernels/hash.cc +++ b/cpp/src/arrow/compute/kernels/hash.cc @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -39,32 +38,24 @@ #include "arrow/type_traits.h" #include "arrow/util/bit-util.h" #include "arrow/util/checked_cast.h" -#include "arrow/util/hash-util.h" -#include "arrow/util/hash.h" +#include "arrow/util/hashing.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" +#include "arrow/util/string_view.h" +#include "arrow/visitor_inline.h" namespace arrow { class MemoryPool; using internal::checked_cast; +using internal::DictionaryTraits; +using internal::HashTraits; namespace compute { -// TODO(wesm): Enable top-level dispatch to SSE4 hashing if it is enabled -#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_USE_ARMCE) -#define HASH_MODE USE_ARMCRC -#elif defined(ARROW_USE_SSE) -#define HASH_MODE USE_SSE42 -#else -#define HASH_MODE USE_DEFAULT -#endif - namespace { -enum class SIMDMode : char { NOSIMD, SSE4, AVX2 }; - #define CHECK_IMPLEMENTED(KERNEL, FUNCNAME, TYPE) \ if (!KERNEL) { \ std::stringstream ss; \ @@ -72,754 +63,213 @@ enum class SIMDMode : char { NOSIMD, SSE4, AVX2 }; return Status::NotImplemented(ss.str()); \ } -// This is a slight design concession -- some hash actions have the possibility -// of failure. Rather than introduce extra error checking into all actions, we -// will raise an internal exception so that only the actions where errors can -// occur will experience the extra overhead -class HashException : public std::exception { - public: - explicit HashException(const std::string& msg, StatusCode code = StatusCode::Invalid) - : msg_(msg), code_(code) {} - - ~HashException() throw() override {} - - const char* what() const throw() override; - - StatusCode code() const { return code_; } - - private: - std::string msg_; - StatusCode code_; -}; - -const char* HashException::what() const throw() { return msg_.c_str(); } +// ---------------------------------------------------------------------- +// Unique implementation -class HashTable { +class UniqueAction { public: - HashTable(const std::shared_ptr& type, MemoryPool* pool) - : type_(type), - pool_(pool), - initialized_(false), - hash_table_(nullptr), - hash_slots_(nullptr), - hash_table_size_(0), - mod_bitmask_(0) {} + UniqueAction(const std::shared_ptr& type, MemoryPool* pool) {} - virtual ~HashTable() {} - - virtual Status Append(const ArrayData& input) = 0; - virtual Status Flush(Datum* out) = 0; - virtual Status GetDictionary(std::shared_ptr* out) = 0; - - protected: - Status Init(int64_t elements); + Status Reset() { return Status::OK(); } - std::shared_ptr type_; - MemoryPool* pool_; - bool initialized_; + Status Reserve(const int64_t length) { return Status::OK(); } - // The hash table contains integer indices that reference the set of observed - // distinct values - std::shared_ptr hash_table_; - hash_slot_t* hash_slots_; + void ObserveNull() {} - /// Size of the table. Must be a power of 2. - int64_t hash_table_size_; + template + void ObserveFound(Index index) {} - /// Size at which we decide to resize - int64_t hash_table_load_threshold_; + template + void ObserveNotFound(Index index) {} - // Store hash_table_size_ - 1, so that j & mod_bitmask_ is equivalent to j % - // hash_table_size_, but uses far fewer CPU cycles - int64_t mod_bitmask_; + Status Flush(Datum* out) { return Status::OK(); } }; -Status HashTable::Init(int64_t elements) { - DCHECK_EQ(elements, BitUtil::NextPower2(elements)); - RETURN_NOT_OK(internal::NewHashTable(elements, pool_, &hash_table_)); - hash_slots_ = reinterpret_cast(hash_table_->mutable_data()); - hash_table_size_ = elements; - hash_table_load_threshold_ = - static_cast(static_cast(elements) * kMaxHashTableLoad); - mod_bitmask_ = elements - 1; - initialized_ = true; - return Status::OK(); -} - -template -class HashTableKernel : public HashTable {}; - -// Types of hash actions -// -// unique: append to dictionary when not found, no-op with slot -// dictionary-encode: append to dictionary when not found, append slot # -// match: raise or set null when not found, otherwise append slot # -// isin: set false when not found, otherwise true -// value counts: append to dictionary when not found, increment count for slot - -template -class HashDictionary {}; - // ---------------------------------------------------------------------- -// Hash table pass for nulls +// Dictionary encode implementation -template -class HashTableKernel> : public HashTable { +class DictEncodeAction { public: - using HashTable::HashTable; - - Status Init() { - // No-op, do not even need to initialize hash table - return Status::OK(); - } + DictEncodeAction(const std::shared_ptr& type, MemoryPool* pool) + : indices_builder_(pool) {} - Status Append(const ArrayData& arr) override { - if (!initialized_) { - RETURN_NOT_OK(Init()); - } - auto action = checked_cast(this); - RETURN_NOT_OK(action->Reserve(arr.length)); - for (int64_t i = 0; i < arr.length; ++i) { - action->ObserveNull(); - } + Status Reset() { + indices_builder_.Reset(); return Status::OK(); } - Status GetDictionary(std::shared_ptr* out) override { - // TODO(wesm): handle null being a valid dictionary value - auto null_array = std::make_shared(0); - *out = null_array->data(); - return Status::OK(); - } -}; - -// ---------------------------------------------------------------------- -// Hash table pass for primitive types - -template -struct HashDictionary> { - using T = typename Type::c_type; + Status Reserve(const int64_t length) { return indices_builder_.Reserve(length); } - explicit HashDictionary(MemoryPool* pool) : pool(pool), size(0), capacity(0) {} + void ObserveNull() { indices_builder_.UnsafeAppendNull(); } - Status Init() { - this->size = 0; - RETURN_NOT_OK(AllocateResizableBuffer(this->pool, 0, &this->buffer)); - return Resize(kInitialHashTableSize); + template + void ObserveFound(Index index) { + indices_builder_.UnsafeAppend(index); } - Status DoubleSize() { return Resize(this->size * 2); } - - Status Resize(const int64_t elements) { - RETURN_NOT_OK(this->buffer->Resize(elements * sizeof(T))); + template + void ObserveNotFound(Index index) { + return ObserveFound(index); + } - this->capacity = elements; - this->values = reinterpret_cast(this->buffer->mutable_data()); + Status Flush(Datum* out) { + std::shared_ptr result; + RETURN_NOT_OK(indices_builder_.FinishInternal(&result)); + out->value = std::move(result); return Status::OK(); } - MemoryPool* pool; - std::shared_ptr buffer; - T* values; - int64_t size; - int64_t capacity; + private: + Int32Builder indices_builder_; }; -#define GENERIC_HASH_PASS(HASH_INNER_LOOP) \ - if (arr.null_count != 0) { \ - internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset, arr.length); \ - for (int64_t i = 0; i < arr.length; ++i) { \ - const bool is_null = valid_reader.IsNotSet(); \ - valid_reader.Next(); \ - \ - if (is_null) { \ - action->ObserveNull(); \ - continue; \ - } \ - \ - HASH_INNER_LOOP(); \ - } \ - } else { \ - for (int64_t i = 0; i < arr.length; ++i) { \ - HASH_INNER_LOOP(); \ - } \ - } +// ---------------------------------------------------------------------- +// Base class for all hash kernel implementations -template -class HashTableKernel< - Type, Action, - typename std::enable_if::value && !is_8bit_int::value>::type> - : public HashTable { +class HashKernelImpl : public HashKernel { public: - using T = typename Type::c_type; - - HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) - : HashTable(type, pool), dict_(pool) {} - - Status Init() { - RETURN_NOT_OK(dict_.Init()); - return HashTable::Init(kInitialHashTableSize); - } - - Status Append(const ArrayData& arr) override { - if (!initialized_) { - RETURN_NOT_OK(Init()); - } - - const T* values = GetValues(arr, 1); - auto action = checked_cast(this); - - RETURN_NOT_OK(action->Reserve(arr.length)); - -#define HASH_INNER_LOOP() \ - const T value = values[i]; \ - int64_t j = HashValue(value) & mod_bitmask_; \ - hash_slot_t slot = hash_slots_[j]; \ - \ - while (kHashSlotEmpty != slot && dict_.values[slot] != value) { \ - ++j; \ - if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ - j = 0; \ - } \ - slot = hash_slots_[j]; \ - } \ - \ - if (slot == kHashSlotEmpty) { \ - if (!Action::allow_expand) { \ - throw HashException("Encountered new dictionary value"); \ - } \ - \ - slot = static_cast(dict_.size); \ - hash_slots_[j] = slot; \ - dict_.values[dict_.size++] = value; \ - \ - action->ObserveNotFound(slot); \ - \ - if (ARROW_PREDICT_FALSE(dict_.size > hash_table_load_threshold_)) { \ - RETURN_NOT_OK(action->DoubleSize()); \ - } \ - } else { \ - action->ObserveFound(slot); \ + Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override { + DCHECK_EQ(Datum::ARRAY, input.kind()); + RETURN_NOT_OK(Append(ctx, *input.array())); + return Flush(out); } - GENERIC_HASH_PASS(HASH_INNER_LOOP); - -#undef HASH_INNER_LOOP - - return Status::OK(); + Status Append(FunctionContext* ctx, const ArrayData& input) override { + std::lock_guard guard(lock_); + return Append(input); } - Status GetDictionary(std::shared_ptr* out) override { - // TODO(wesm): handle null being in the dictionary - auto dict_data = dict_.buffer; - RETURN_NOT_OK(dict_data->Resize(dict_.size * sizeof(T), false)); - dict_data->ZeroPadding(); - - *out = ArrayData::Make(type_, dict_.size, {nullptr, dict_data}, 0); - return Status::OK(); - } + virtual Status Append(const ArrayData& arr) = 0; protected: - int64_t HashValue(const T& value) const { - // TODO(wesm): Use faster hash function for C types - return HashUtil::Hash(&value, sizeof(T), 0); - } - - Status DoubleTableSize() { -#define PRIMITIVE_INNER_LOOP \ - const T value = dict_.values[index]; \ - int64_t j = HashValue(value) & new_mod_bitmask; - - DOUBLE_TABLE_SIZE(, PRIMITIVE_INNER_LOOP); - -#undef PRIMITIVE_INNER_LOOP - - return dict_.Resize(hash_table_size_); - } - - HashDictionary dict_; + std::mutex lock_; }; // ---------------------------------------------------------------------- -// Hash table for boolean types +// Base class for all "regular" hash kernel implementations +// (NullType has a separate implementation) -template -class HashTableKernel> : public HashTable { +template +class RegularHashKernelImpl : public HashKernelImpl { public: - HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) - : HashTable(type, pool) { - std::fill(table_, table_ + 2, kHashSlotEmpty); + RegularHashKernelImpl(const std::shared_ptr& type, MemoryPool* pool) + : pool_(pool), type_(type), action_(type, pool) {} + + Status Reset() override { + memo_table_.reset(new MemoTable(0)); + return action_.Reset(); } Status Append(const ArrayData& arr) override { - auto action = checked_cast(this); - - RETURN_NOT_OK(action->Reserve(arr.length)); - - internal::BitmapReader value_reader(arr.buffers[1]->data(), arr.offset, arr.length); - -#define HASH_INNER_LOOP() \ - if (slot == kHashSlotEmpty) { \ - if (!Action::allow_expand) { \ - throw HashException("Encountered new dictionary value"); \ - } \ - table_[j] = slot = static_cast(dict_.size()); \ - dict_.push_back(value); \ - action->ObserveNotFound(slot); \ - } else { \ - action->ObserveFound(slot); \ + RETURN_NOT_OK(action_.Reserve(arr.length)); + return ArrayDataVisitor::Visit(arr, this); } - if (arr.null_count != 0) { - internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset, arr.length); - for (int64_t i = 0; i < arr.length; ++i) { - const bool is_null = valid_reader.IsNotSet(); - valid_reader.Next(); - if (is_null) { - value_reader.Next(); - action->ObserveNull(); - continue; - } - const bool value = value_reader.IsSet(); - value_reader.Next(); - const int j = value ? 1 : 0; - hash_slot_t slot = table_[j]; - HASH_INNER_LOOP(); - } - } else { - for (int64_t i = 0; i < arr.length; ++i) { - const bool value = value_reader.IsSet(); - value_reader.Next(); - const int j = value ? 1 : 0; - hash_slot_t slot = table_[j]; - HASH_INNER_LOOP(); - } - } - -#undef HASH_INNER_LOOP - - return Status::OK(); - } + Status Flush(Datum* out) override { return action_.Flush(out); } Status GetDictionary(std::shared_ptr* out) override { - BooleanBuilder builder(pool_); - for (const bool value : dict_) { - RETURN_NOT_OK(builder.Append(value)); - } - return builder.FinishInternal(out); + return DictionaryTraits::GetDictionaryArrayData(pool_, type_, *memo_table_, + 0 /* start_offset */, out); } - private: - hash_slot_t table_[2]; - std::vector dict_; -}; - -// ---------------------------------------------------------------------- -// Hash table pass for variable-length binary types - -template -class HashTableKernel> : public HashTable { - public: - HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) - : HashTable(type, pool), dict_offsets_(pool), dict_data_(pool), dict_size_(0) {} - - Status Init() { - RETURN_NOT_OK(dict_offsets_.Resize(kInitialHashTableSize)); - - // We append the end offset after each append to the dictionary, so this - // sets the initial condition for the length-0 case - // - // initial offsets (dict size == 0): 0 - // after 1st dict entry of length 3: 0 3 - // after 2nd dict entry of length 4: 0 3 7 - RETURN_NOT_OK(dict_offsets_.Append(0)); - return HashTable::Init(kInitialHashTableSize); - } - - Status Append(const ArrayData& arr) override { - constexpr uint8_t empty_value = 0; - if (!initialized_) { - RETURN_NOT_OK(Init()); - } - - const int32_t* offsets = GetValues(arr, 1); - const uint8_t* data; - if (arr.buffers[2].get() == nullptr) { - data = &empty_value; - } else { - data = GetValues(arr, 2); - } - - auto action = checked_cast(this); - RETURN_NOT_OK(action->Reserve(arr.length)); - -#define HASH_INNER_LOOP() \ - const int32_t position = offsets[i]; \ - const int32_t length = offsets[i + 1] - position; \ - const uint8_t* value = data + position; \ - \ - int64_t j = HashValue(value, length) & mod_bitmask_; \ - hash_slot_t slot = hash_slots_[j]; \ - \ - const int32_t* dict_offsets = dict_offsets_.data(); \ - const uint8_t* dict_data = dict_data_.data(); \ - while (kHashSlotEmpty != slot && \ - !((dict_offsets[slot + 1] - dict_offsets[slot]) == length && \ - 0 == memcmp(value, dict_data + dict_offsets[slot], length))) { \ - ++j; \ - if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ - j = 0; \ - } \ - slot = hash_slots_[j]; \ - } \ - \ - if (slot == kHashSlotEmpty) { \ - if (!Action::allow_expand) { \ - throw HashException("Encountered new dictionary value"); \ - } \ - \ - slot = dict_size_++; \ - hash_slots_[j] = slot; \ - \ - RETURN_NOT_OK(dict_data_.Append(value, length)); \ - RETURN_NOT_OK(dict_offsets_.Append(static_cast(dict_data_.length()))); \ - \ - action->ObserveNotFound(slot); \ - \ - if (ARROW_PREDICT_FALSE(dict_size_ > hash_table_load_threshold_)) { \ - RETURN_NOT_OK(action->DoubleSize()); \ - } \ - } else { \ - action->ObserveFound(slot); \ - } - - GENERIC_HASH_PASS(HASH_INNER_LOOP); - -#undef HASH_INNER_LOOP - + Status VisitNull() { + action_.ObserveNull(); return Status::OK(); } - Status GetDictionary(std::shared_ptr* out) override { - // TODO(wesm): handle null being in the dictionary - BufferVector buffers = {nullptr, nullptr, nullptr}; - - RETURN_NOT_OK(dict_offsets_.Finish(&buffers[1])); - RETURN_NOT_OK(dict_data_.Finish(&buffers[2])); - - *out = ArrayData::Make(type_, dict_size_, std::move(buffers), 0); + Status VisitValue(const Scalar& value) { + auto on_found = [this](int32_t memo_index) { action_.ObserveFound(memo_index); }; + auto on_not_found = [this](int32_t memo_index) { + action_.ObserveNotFound(memo_index); + }; + memo_table_->GetOrInsert(value, on_found, on_not_found); return Status::OK(); } protected: - int64_t HashValue(const uint8_t* data, int32_t length) const { - return HashUtil::Hash(data, length, 0); - } - - Status DoubleTableSize() { -#define VARBYTES_SETUP \ - const int32_t* dict_offsets = dict_offsets_.data(); \ - const uint8_t* dict_data = dict_data_.data() - -#define VARBYTES_COMPUTE_HASH \ - const int32_t length = dict_offsets[index + 1] - dict_offsets[index]; \ - const uint8_t* value = dict_data + dict_offsets[index]; \ - int64_t j = HashValue(value, length) & new_mod_bitmask - - DOUBLE_TABLE_SIZE(VARBYTES_SETUP, VARBYTES_COMPUTE_HASH); + using MemoTable = typename HashTraits::MemoTableType; -#undef VARBYTES_SETUP -#undef VARBYTES_COMPUTE_HASH - - return Status::OK(); - } - - TypedBufferBuilder dict_offsets_; - TypedBufferBuilder dict_data_; - int32_t dict_size_; + MemoryPool* pool_; + std::shared_ptr type_; + Action action_; + std::unique_ptr memo_table_; }; // ---------------------------------------------------------------------- -// Hash table pass for fixed size binary types +// Hash kernel implementation for nulls -template -class HashTableKernel> - : public HashTable { +template +class NullHashKernelImpl : public HashKernelImpl { public: - HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) - : HashTable(type, pool), dict_data_(pool), dict_size_(0) { - const auto& fw_type = checked_cast(*type); - byte_width_ = fw_type.bit_width() / 8; - } + NullHashKernelImpl(const std::shared_ptr& type, MemoryPool* pool) + : pool_(pool), type_(type), action_(type, pool) {} - Status Init() { - RETURN_NOT_OK(dict_data_.Resize(kInitialHashTableSize * byte_width_)); - return HashTable::Init(kInitialHashTableSize); - } + Status Reset() override { return action_.Reset(); } Status Append(const ArrayData& arr) override { - if (!initialized_) { - RETURN_NOT_OK(Init()); + RETURN_NOT_OK(action_.Reserve(arr.length)); + for (int64_t i = 0; i < arr.length; ++i) { + action_.ObserveNull(); } - - const uint8_t* data = GetValues(arr, 1); - - auto action = checked_cast(this); - RETURN_NOT_OK(action->Reserve(arr.length)); - -#define HASH_INNER_LOOP() \ - const uint8_t* value = data + i * byte_width_; \ - int64_t j = HashValue(value) & mod_bitmask_; \ - hash_slot_t slot = hash_slots_[j]; \ - \ - const uint8_t* dict_data = dict_data_.data(); \ - while (kHashSlotEmpty != slot && \ - !(0 == memcmp(value, dict_data + slot * byte_width_, byte_width_))) { \ - ++j; \ - if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ - j = 0; \ - } \ - slot = hash_slots_[j]; \ - } \ - \ - if (slot == kHashSlotEmpty) { \ - if (!Action::allow_expand) { \ - throw HashException("Encountered new dictionary value"); \ - } \ - \ - slot = dict_size_++; \ - hash_slots_[j] = slot; \ - \ - RETURN_NOT_OK(dict_data_.Append(value, byte_width_)); \ - \ - action->ObserveNotFound(slot); \ - \ - if (ARROW_PREDICT_FALSE(dict_size_ > hash_table_load_threshold_)) { \ - RETURN_NOT_OK(action->DoubleSize()); \ - } \ - } else { \ - action->ObserveFound(slot); \ - } - - GENERIC_HASH_PASS(HASH_INNER_LOOP); - -#undef HASH_INNER_LOOP - return Status::OK(); } - Status GetDictionary(std::shared_ptr* out) override { - // TODO(wesm): handle null being in the dictionary - BufferVector buffers = {nullptr, nullptr}; - RETURN_NOT_OK(dict_data_.Finish(&buffers[1])); + Status Flush(Datum* out) override { return action_.Flush(out); } - *out = ArrayData::Make(type_, dict_size_, std::move(buffers), 0); + Status GetDictionary(std::shared_ptr* out) override { + // TODO(wesm): handle null being a valid dictionary value + auto null_array = std::make_shared(0); + *out = null_array->data(); return Status::OK(); } protected: - int64_t HashValue(const uint8_t* data) const { - return HashUtil::Hash(data, byte_width_, 0); - } - - Status DoubleTableSize() { -#define FIXED_BYTES_SETUP const uint8_t* dict_data = dict_data_.data() - -#define FIXED_BYTES_COMPUTE_HASH \ - int64_t j = HashValue(dict_data + index * byte_width_) & new_mod_bitmask - - DOUBLE_TABLE_SIZE(FIXED_BYTES_SETUP, FIXED_BYTES_COMPUTE_HASH); - -#undef FIXED_BYTES_SETUP -#undef FIXED_BYTES_COMPUTE_HASH - - return Status::OK(); - } - - int32_t byte_width_; - TypedBufferBuilder dict_data_; - int32_t dict_size_; + MemoryPool* pool_; + std::shared_ptr type_; + Action action_; }; // ---------------------------------------------------------------------- -// Hash table pass for uint8 and int8 - -template -inline int Hash8Bit(const T val) { - return 0; -} - -template <> -inline int Hash8Bit(const uint8_t val) { - return val; -} +// Kernel wrapper for generic hash table kernels -template <> -inline int Hash8Bit(const int8_t val) { - return val + 128; -} +template +struct HashKernelTraits {}; template -class HashTableKernel> : public HashTable { - public: - using T = typename Type::c_type; - - HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) - : HashTable(type, pool) { - std::fill(table_, table_ + 256, kHashSlotEmpty); - } - - Status Append(const ArrayData& arr) override { - const T* values = GetValues(arr, 1); - auto action = checked_cast(this); - RETURN_NOT_OK(action->Reserve(arr.length)); - -#define HASH_INNER_LOOP() \ - const T value = values[i]; \ - const int hash = Hash8Bit(value); \ - hash_slot_t slot = table_[hash]; \ - \ - if (slot == kHashSlotEmpty) { \ - if (!Action::allow_expand) { \ - throw HashException("Encountered new dictionary value"); \ - } \ - \ - slot = static_cast(dict_.size()); \ - table_[hash] = slot; \ - dict_.push_back(value); \ - action->ObserveNotFound(slot); \ - } else { \ - action->ObserveFound(slot); \ - } - - GENERIC_HASH_PASS(HASH_INNER_LOOP); - -#undef HASH_INNER_LOOP - - return Status::OK(); - } - - Status GetDictionary(std::shared_ptr* out) override { - using BuilderType = typename TypeTraits::BuilderType; - BuilderType builder(pool_); - - for (const T value : dict_) { - RETURN_NOT_OK(builder.Append(value)); - } - - return builder.FinishInternal(out); - } - - private: - hash_slot_t table_[256]; - std::vector dict_; +struct HashKernelTraits> { + using HashKernelImpl = NullHashKernelImpl; }; -// ---------------------------------------------------------------------- -// Unique implementation - -template -class UniqueImpl : public HashTableKernel> { - public: - static constexpr bool allow_expand = true; - using Base = HashTableKernel>; - using Base::Base; - - Status Reserve(const int64_t length) { return Status::OK(); } - - void ObserveFound(const hash_slot_t slot) {} - void ObserveNull() {} - void ObserveNotFound(const hash_slot_t slot) {} - - Status DoubleSize() { return Base::DoubleTableSize(); } - - Status Append(const ArrayData& input) override { return Base::Append(input); } - - Status Flush(Datum* out) override { - // No-op - return Status::OK(); - } +template +struct HashKernelTraits> { + using HashKernelImpl = RegularHashKernelImpl; }; -// ---------------------------------------------------------------------- -// Dictionary encode implementation - -template -class DictEncodeImpl : public HashTableKernel> { - public: - static constexpr bool allow_expand = true; - using Base = HashTableKernel; - - DictEncodeImpl(const std::shared_ptr& type, MemoryPool* pool) - : Base(type, pool), indices_builder_(pool) {} - - Status Reserve(const int64_t length) { return indices_builder_.Reserve(length); } - - void ObserveNull() { indices_builder_.UnsafeAppendToBitmap(false); } - - void ObserveFound(const hash_slot_t slot) { indices_builder_.UnsafeAppend(slot); } - - void ObserveNotFound(const hash_slot_t slot) { return ObserveFound(slot); } - - Status DoubleSize() { return Base::DoubleTableSize(); } - - Status Flush(Datum* out) override { - std::shared_ptr result; - RETURN_NOT_OK(indices_builder_.FinishInternal(&result)); - out->value = std::move(result); - return Status::OK(); - } - - using Base::Append; - - private: - Int32Builder indices_builder_; +template +struct HashKernelTraits> { + using HashKernelImpl = RegularHashKernelImpl; }; -// ---------------------------------------------------------------------- -// Kernel wrapper for generic hash table kernels - -class HashKernelImpl : public HashKernel { - public: - explicit HashKernelImpl(std::unique_ptr hasher) - : hasher_(std::move(hasher)) {} - - Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override { - DCHECK_EQ(Datum::ARRAY, input.kind()); - RETURN_NOT_OK(Append(ctx, *input.array())); - return Flush(out); - } - - Status Append(FunctionContext* ctx, const ArrayData& input) override { - std::lock_guard guard(lock_); - try { - RETURN_NOT_OK(hasher_->Append(input)); - } catch (const HashException& e) { - return Status(e.code(), e.what()); - } - return Status::OK(); - } - - Status Flush(Datum* out) override { return hasher_->Flush(out); } - - Status GetDictionary(std::shared_ptr* out) override { - return hasher_->GetDictionary(out); - } +template +struct HashKernelTraits> { + using HashKernelImpl = RegularHashKernelImpl; +}; - private: - std::mutex lock_; - std::unique_ptr hasher_; +template +struct HashKernelTraits> { + using HashKernelImpl = RegularHashKernelImpl; }; } // namespace Status GetUniqueKernel(FunctionContext* ctx, const std::shared_ptr& type, std::unique_ptr* out) { - std::unique_ptr hasher; + std::unique_ptr kernel; -#define UNIQUE_CASE(InType) \ - case InType::type_id: \ - hasher.reset(new UniqueImpl(type, ctx->memory_pool())); \ +#define UNIQUE_CASE(InType) \ + case InType::type_id: \ + kernel.reset(new typename HashKernelTraits::HashKernelImpl( \ + type, ctx->memory_pool())); \ break switch (type->id()) { @@ -850,19 +300,22 @@ Status GetUniqueKernel(FunctionContext* ctx, const std::shared_ptr& ty #undef UNIQUE_CASE - CHECK_IMPLEMENTED(hasher, "unique", type); - out->reset(new HashKernelImpl(std::move(hasher))); + CHECK_IMPLEMENTED(kernel, "unique", type); + RETURN_NOT_OK(kernel->Reset()); + *out = std::move(kernel); return Status::OK(); } Status GetDictionaryEncodeKernel(FunctionContext* ctx, const std::shared_ptr& type, std::unique_ptr* out) { - std::unique_ptr hasher; + std::unique_ptr kernel; -#define DICTIONARY_ENCODE_CASE(InType) \ - case InType::type_id: \ - hasher.reset(new DictEncodeImpl(type, ctx->memory_pool())); \ +#define DICTIONARY_ENCODE_CASE(InType) \ + case InType::type_id: \ + kernel.reset(new \ + typename HashKernelTraits::HashKernelImpl( \ + type, ctx->memory_pool())); \ break switch (type->id()) { @@ -893,8 +346,9 @@ Status GetDictionaryEncodeKernel(FunctionContext* ctx, #undef DICTIONARY_ENCODE_CASE - CHECK_IMPLEMENTED(hasher, "dictionary-encode", type); - out->reset(new HashKernelImpl(std::move(hasher))); + CHECK_IMPLEMENTED(kernel, "dictionary-encode", type); + RETURN_NOT_OK(kernel->Reset()); + *out = std::move(kernel); return Status::OK(); } diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 89229169ecdb7..c43298d011efe 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -60,9 +60,6 @@ install(FILES windows_compatibility.h DESTINATION include/arrow/util) -# Armv8 CRC support -configure_file(config.in.cmake ${CMAKE_CURRENT_SOURCE_DIR}/my_config.h) - ####################################### # arrow_test_main ####################################### diff --git a/cpp/src/arrow/util/armce-util.h b/cpp/src/arrow/util/armce-util.h new file mode 100644 index 0000000000000..8c26c63197bfd --- /dev/null +++ b/cpp/src/arrow/util/armce-util.h @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_UTIL_ARMCE_UTIL_H +#define ARROW_UTIL_ARMCE_UTIL_H + +namespace arrow { + +#if defined(__aarch64__) || defined(__AARCH64__) +#ifdef __ARM_FEATURE_CRC32 +#define ARROW_HAVE_ARM_CRC +#include +#endif +#endif + +#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_HAVE_ARM_CRC) + +#include +#include +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1 << 7) +#endif +static inline uint32_t crc32c_runtime_check(void) +{ + unsigned long auxv = getauxval(AT_HWCAP); + return (auxv & HWCAP_CRC32) != 0; +} + +static inline uint32_t ARMCE_crc32_u8(uint32_t crc, uint8_t v) { + return __crc32cb(crc, v); +} + +static inline uint32_t ARMCE_crc32_u16(uint32_t crc, uint16_t v) { + return __crc32ch(crc, v); +} + +static inline uint32_t ARMCE_crc32_u32(uint32_t crc, uint32_t v) { + return __crc32cw(crc, v); +} + +static inline uint32_t ARMCE_crc32_u64(uint32_t crc, uint64_t v) { + return __crc32cd(crc, v); +} + +#else + +static inline uint32_t crc32c_runtime_check(void) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u8(uint32_t, uint8_t) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u16(uint32_t, uint16_t) { + DCHECK(false) << "Arm crc32 is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u32(uint32_t, uint32_t) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u64(uint32_t, uint64_t) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +#endif // defined(__GNUC__) && defined(__linux__) && defined(ARROW_HAVE_ARM_CRC) + +} // namespace arrow + +#endif // ARROW_UTIL_ARMCE_UTIL_H diff --git a/cpp/src/arrow/util/hash-util.h b/cpp/src/arrow/util/hash-util.h index 3f7e4048bdf10..fccb8ca733ab0 100644 --- a/cpp/src/arrow/util/hash-util.h +++ b/cpp/src/arrow/util/hash-util.h @@ -26,21 +26,54 @@ #include "arrow/util/logging.h" #include "arrow/util/macros.h" #include "arrow/util/sse-util.h" +#include "arrow/util/armce-util.h" + +static inline uint32_t HW_crc32_u8(uint32_t crc, uint8_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +static inline uint32_t HW_crc32_u16(uint32_t crc, uint16_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +static inline uint32_t HW_crc32_u32(uint32_t crc, uint32_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +static inline uint32_t HW_crc32_u64(uint32_t crc, uint64_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +#ifdef ARROW_HAVE_SSE4_2 +#define HW_crc32_u8 SSE4_crc32_u8 +#define HW_crc32_u16 SSE4_crc32_u16 +#define HW_crc32_u32 SSE4_crc32_u32 +#define HW_crc32_u64 SSE4_crc32_u64 +#elif defined(ARROW_HAVE_ARM_CRC) +#define HW_crc32_u8 ARMCE_crc32_u8 +#define HW_crc32_u16 ARMCE_crc32_u16 +#define HW_crc32_u32 ARMCE_crc32_u32 +#define HW_crc32_u64 ARMCE_crc32_u64 +#endif namespace arrow { /// Utility class to compute hash values. class HashUtil { public: -#ifdef ARROW_HAVE_SSE4_2 +#if defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_ARM_CRC) static constexpr bool have_hardware_crc32 = true; #else static constexpr bool have_hardware_crc32 = false; #endif - /// Compute the Crc32 hash for data using SSE4 instructions. The input hash + /// Compute the Crc32 hash for data using SSE4/ArmCRC instructions. The input hash /// parameter is the current hash/seed value. - /// This should only be called if SSE is supported. + /// This should only be called if SSE/ArmCRC is supported. /// This is ~4x faster than Fnv/Boost Hash. /// TODO: crc32 hashes with different seeds do not result in different hash functions. /// The resulting hashes are correlated. @@ -49,15 +82,15 @@ class HashUtil { const uint8_t* end = p + nbytes; while (p <= end - 8) { - hash = SSE4_crc32_u64(hash, *reinterpret_cast(p)); + hash = HW_crc32_u64(hash, *reinterpret_cast(p)); p += 8; } while (p <= end - 4) { - hash = SSE4_crc32_u32(hash, *reinterpret_cast(p)); + hash = HW_crc32_u32(hash, *reinterpret_cast(p)); p += 4; } while (p < end) { - hash = SSE4_crc32_u8(hash, *p); + hash = HW_crc32_u8(hash, *p); ++p; } @@ -81,30 +114,30 @@ class HashUtil { uint32_t h2 = static_cast(hash); while (nbytes >= 16) { - h1 = SSE4_crc32_u64(h1, *reinterpret_cast(p)); - h2 = SSE4_crc32_u64(h2, *reinterpret_cast(p + 8)); + h1 = HW_crc32_u64(h1, *reinterpret_cast(p)); + h2 = HW_crc32_u64(h2, *reinterpret_cast(p + 8)); nbytes -= 16; p += 16; } if (nbytes >= 8) { - h1 = SSE4_crc32_u32(h1, *reinterpret_cast(p)); - h2 = SSE4_crc32_u32(h2, *reinterpret_cast(p + 4)); + h1 = HW_crc32_u32(h1, *reinterpret_cast(p)); + h2 = HW_crc32_u32(h2, *reinterpret_cast(p + 4)); nbytes -= 8; p += 8; } if (nbytes >= 4) { - h1 = SSE4_crc32_u16(h1, *reinterpret_cast(p)); - h2 = SSE4_crc32_u16(h2, *reinterpret_cast(p + 2)); + h1 = HW_crc32_u16(h1, *reinterpret_cast(p)); + h2 = HW_crc32_u16(h2, *reinterpret_cast(p + 2)); nbytes -= 4; p += 4; } switch (nbytes) { case 3: - h1 = SSE4_crc32_u8(h1, p[3]); + h1 = HW_crc32_u8(h1, p[3]); case 2: - h2 = SSE4_crc32_u8(h2, p[2]); + h2 = HW_crc32_u8(h2, p[2]); case 1: - h1 = SSE4_crc32_u8(h1, p[1]); + h1 = HW_crc32_u8(h1, p[1]); case 0: break; default: @@ -118,7 +151,7 @@ class HashUtil { /// CrcHash() specialized for 1-byte data static inline uint32_t CrcHash1(const void* v, uint32_t hash) { const uint8_t* s = reinterpret_cast(v); - hash = SSE4_crc32_u8(hash, *s); + hash = HW_crc32_u8(hash, *s); hash = (hash << 16) | (hash >> 16); return hash; } @@ -126,7 +159,7 @@ class HashUtil { /// CrcHash() specialized for 2-byte data static inline uint32_t CrcHash2(const void* v, uint32_t hash) { const uint16_t* s = reinterpret_cast(v); - hash = SSE4_crc32_u16(hash, *s); + hash = HW_crc32_u16(hash, *s); hash = (hash << 16) | (hash >> 16); return hash; } @@ -134,7 +167,7 @@ class HashUtil { /// CrcHash() specialized for 4-byte data static inline uint32_t CrcHash4(const void* v, uint32_t hash) { const uint32_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u32(hash, *p); + hash = HW_crc32_u32(hash, *p); hash = (hash << 16) | (hash >> 16); return hash; } @@ -142,7 +175,7 @@ class HashUtil { /// CrcHash() specialized for 8-byte data static inline uint32_t CrcHash8(const void* v, uint32_t hash) { const uint64_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); hash = (hash << 16) | (hash >> 16); return hash; } @@ -150,9 +183,9 @@ class HashUtil { /// CrcHash() specialized for 12-byte data static inline uint32_t CrcHash12(const void* v, uint32_t hash) { const uint64_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); ++p; - hash = SSE4_crc32_u32(hash, *reinterpret_cast(p)); + hash = HW_crc32_u32(hash, *reinterpret_cast(p)); hash = (hash << 16) | (hash >> 16); return hash; } @@ -160,9 +193,9 @@ class HashUtil { /// CrcHash() specialized for 16-byte data static inline uint32_t CrcHash16(const void* v, uint32_t hash) { const uint64_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); ++p; - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); hash = (hash << 16) | (hash >> 16); return hash; } @@ -251,8 +284,8 @@ class HashUtil { return static_cast((hash_u64 >> 32) ^ (hash_u64 & 0xFFFFFFFF)); } - // With sse4.2 - template + // Hash template + template static inline int Hash(const void* data, int32_t bytes, uint32_t seed); /// The magic number (used in hash_combine()) 0x9e3779b9 = 2^32 / (golden ratio). @@ -288,13 +321,21 @@ class HashUtil { } }; -// With sse4.2 +// HW Hash template <> inline int HashUtil::Hash(const void* data, int32_t bytes, uint32_t seed) { - return static_cast(HashUtil::CrcHash(data, bytes, seed)); +#ifdef ARROW_HAVE_ARM_CRC + // Need run time check for Arm + // if not support, fall back to Murmur + if (!crc32c_runtime_check()) + return static_cast(HashUtil::MurmurHash2_64(data, bytes, seed)); + else +#endif + // Double CRC + return static_cast(HashUtil::DoubleCrcHash(data, bytes, seed)); } -// Non-sse4 hash +// Murmur Hash template <> inline int HashUtil::Hash(const void* data, int32_t bytes, uint32_t seed) { return static_cast(HashUtil::MurmurHash2_64(data, bytes, seed)); From 6b99d208335f93d36707a1c3dd7177b493574a57 Mon Sep 17 00:00:00 2001 From: Yuqi Gu Date: Thu, 29 Nov 2018 02:52:39 +0000 Subject: [PATCH 3/3] Fix the coding style Change-Id: I02e8a52935376b4ecc700cd17edaeb871b2bd487 --- cpp/src/arrow/util/CMakeLists.txt | 2 +- cpp/src/arrow/util/hash-util.h | 6 +++--- cpp/src/arrow/util/{armce-util.h => neon-util.h} | 13 ++++++------- 3 files changed, 10 insertions(+), 11 deletions(-) rename cpp/src/arrow/util/{armce-util.h => neon-util.h} (92%) diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index c43298d011efe..d785eeeaae99e 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -21,7 +21,6 @@ # Headers: top level install(FILES - armce-util.h bit-stream-utils.h bit-util.h bpacking.h @@ -45,6 +44,7 @@ install(FILES logging.h macros.h memory.h + neon-util.h parallel.h rle-encoding.h sse-util.h diff --git a/cpp/src/arrow/util/hash-util.h b/cpp/src/arrow/util/hash-util.h index fccb8ca733ab0..fd69cb9438c12 100644 --- a/cpp/src/arrow/util/hash-util.h +++ b/cpp/src/arrow/util/hash-util.h @@ -25,8 +25,8 @@ #include "arrow/util/logging.h" #include "arrow/util/macros.h" +#include "arrow/util/neon-util.h" #include "arrow/util/sse-util.h" -#include "arrow/util/armce-util.h" static inline uint32_t HW_crc32_u8(uint32_t crc, uint8_t v) { DCHECK(false) << "Hardware CRC support is not enabled"; @@ -49,12 +49,12 @@ static inline uint32_t HW_crc32_u64(uint32_t crc, uint64_t v) { } #ifdef ARROW_HAVE_SSE4_2 -#define HW_crc32_u8 SSE4_crc32_u8 +#define HW_crc32_u8 SSE4_crc32_u8 #define HW_crc32_u16 SSE4_crc32_u16 #define HW_crc32_u32 SSE4_crc32_u32 #define HW_crc32_u64 SSE4_crc32_u64 #elif defined(ARROW_HAVE_ARM_CRC) -#define HW_crc32_u8 ARMCE_crc32_u8 +#define HW_crc32_u8 ARMCE_crc32_u8 #define HW_crc32_u16 ARMCE_crc32_u16 #define HW_crc32_u32 ARMCE_crc32_u32 #define HW_crc32_u64 ARMCE_crc32_u64 diff --git a/cpp/src/arrow/util/armce-util.h b/cpp/src/arrow/util/neon-util.h similarity index 92% rename from cpp/src/arrow/util/armce-util.h rename to cpp/src/arrow/util/neon-util.h index 8c26c63197bfd..c81bf1440c37c 100644 --- a/cpp/src/arrow/util/armce-util.h +++ b/cpp/src/arrow/util/neon-util.h @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#ifndef ARROW_UTIL_ARMCE_UTIL_H -#define ARROW_UTIL_ARMCE_UTIL_H +#ifndef ARROW_UTIL_NEON_UTIL_H +#define ARROW_UTIL_NEON_UTIL_H namespace arrow { @@ -29,14 +29,13 @@ namespace arrow { #if defined(__GNUC__) && defined(__linux__) && defined(ARROW_HAVE_ARM_CRC) -#include #include +#include #ifndef HWCAP_CRC32 #define HWCAP_CRC32 (1 << 7) #endif -static inline uint32_t crc32c_runtime_check(void) -{ - unsigned long auxv = getauxval(AT_HWCAP); +static inline uint32_t crc32c_runtime_check(void) { + uint64_t auxv = getauxval(AT_HWCAP); return (auxv & HWCAP_CRC32) != 0; } @@ -87,4 +86,4 @@ static inline uint32_t ARMCE_crc32_u64(uint32_t, uint64_t) { } // namespace arrow -#endif // ARROW_UTIL_ARMCE_UTIL_H +#endif // ARROW_UTIL_NEON_UTIL_H