diff --git a/velox/experimental/codegen/CMakeLists.txt b/velox/experimental/codegen/CMakeLists.txt index 353815c16edd..1eaf2f763d3d 100644 --- a/velox/experimental/codegen/CMakeLists.txt +++ b/velox/experimental/codegen/CMakeLists.txt @@ -26,6 +26,9 @@ add_subdirectory(code_generator) add_subdirectory(ast) add_subdirectory(udf_manager) add_subdirectory(utils) +add_subdirectory(benchmark) +add_subdirectory(functions) +add_subdirectory(vector_function) add_library(velox_experimental_codegen Codegen.cpp CodegenStubs.cpp CodegenLogger.cpp) diff --git a/velox/experimental/codegen/benchmark/CMakeLists.txt b/velox/experimental/codegen/benchmark/CMakeLists.txt index ba65ab64bceb..5bff88931394 100644 --- a/velox/experimental/codegen/benchmark/CMakeLists.txt +++ b/velox/experimental/codegen/benchmark/CMakeLists.txt @@ -13,11 +13,12 @@ add_executable( velox_codegen_benchmark_single_output SingleOutputDefaultNullsBenchmark.cpp - SingleOutputNotDefaultNullsBenchmark.cpp BooleanBenchmark.cpp) + SingleOutputNotDefaultNullsBenchmark.cpp BooleanBenchmarks.cpp) target_link_libraries( velox_codegen_benchmark_single_output velox_exec_test_lib + velox_codegen_utils_resource_path velox_codegen_code_generator ${FOLLY_BENCHMARK} ${FOLLY_WITH_DEPENDENCIES} @@ -26,7 +27,6 @@ target_link_libraries( ${GFLAGS_LIBRARIES} ${FMT} velox_core - koski_parser velox_exec_test_util velox_functions_lib velox_functions_common @@ -46,8 +46,7 @@ add_compile_definitions(velox_codegen_test add_compile_definitions( velox_codegen_test CODEGEN="$") -add_compile_definitions(velox_codegen_test - KOSKI_PARSER="$") + add_compile_definitions( velox_codegen_test VELOX_FUNCTIONS_LIB="$") @@ -89,7 +88,7 @@ target_link_libraries( ${GLOG} ${FMT} velox_core - koski_parser + velox_codegen_utils_resource_path velox_exec_test_util velox_functions_lib velox_functions_common diff --git a/velox/experimental/codegen/code_generator/tests/ArithmeticFunctionsTest.cpp b/velox/experimental/codegen/code_generator/tests/ArithmeticFunctionsTest.cpp index d14a9e4626bf..b0c0e74e5570 100644 --- a/velox/experimental/codegen/code_generator/tests/ArithmeticFunctionsTest.cpp +++ b/velox/experimental/codegen/code_generator/tests/ArithmeticFunctionsTest.cpp @@ -466,7 +466,11 @@ TEST_F(ArithmeticFunctionsTest, DISABLED_testRound) { "round(C0)", {{32}, {13}, {-13}, {1}, {-1}}); } -TEST_F(ArithmeticFunctionsTest, testHash) { +// FIXME: This test errors on macs: +// velox/velox/functions/common/HashImpl.h:22:31: error: implicit instantiation +// of undefined template 'folly::hasher >, void>' +TEST_F(ArithmeticFunctionsTest, DISABLED_testHash) { StringView input("hi welcome"); evaluateAndCompare< RowTypeTrait, diff --git a/velox/experimental/codegen/functions/CMakeLists.txt b/velox/experimental/codegen/functions/CMakeLists.txt new file mode 100644 index 000000000000..78ddac047a24 --- /dev/null +++ b/velox/experimental/codegen/functions/CMakeLists.txt @@ -0,0 +1,14 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_library(velox_codegen_functions INTERFACE) +target_link_libraries(velox_codegen_functions INTERFACE ${FMT}) diff --git a/velox/experimental/codegen/tests/CMakeLists.txt b/velox/experimental/codegen/tests/CMakeLists.txt index acfcb65d96a6..6c716bbab19e 100644 --- a/velox/experimental/codegen/tests/CMakeLists.txt +++ b/velox/experimental/codegen/tests/CMakeLists.txt @@ -15,12 +15,12 @@ add_dependencies(velox_experimental_codegen_test velox_codegen_expression_test) target_link_libraries( velox_experimental_codegen_test ${GTEST_BOTH_LIBRARIES} - velox_codegen_utils_resource_path velox_core velox_exec velox_experimental_codegen velox_exec_test_lib velox_exec_test_util - velox_functions_common) + velox_functions_common + velox_codegen_utils_resource_path) add_test(velox_experimental_codegen_test velox_experimental_codegen_test) diff --git a/velox/experimental/codegen/vector_function/ComplexVectorReader.h b/velox/experimental/codegen/vector_function/ComplexVectorReader.h new file mode 100644 index 000000000000..fdd46beb286d --- /dev/null +++ b/velox/experimental/codegen/vector_function/ComplexVectorReader.h @@ -0,0 +1,194 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include "velox/buffer/Buffer.h" +#include "velox/common/base/Nulls.h" +#include "velox/experimental/codegen/vector_function/StringTypes.h" +#include "velox/experimental/codegen/vector_function/VectorReader-inl.h" +#include "velox/type/Type.h" +#include "velox/vector/ComplexVector.h" +#include "velox/vector/FlatVector.h" + +namespace facebook { +namespace velox { +namespace codegen { + +template +constexpr bool arrayReadableType = TypeTraits::isPrimitiveType; + +template +struct ComplexVectorReader {}; + +// Reader for ArrayVector(FlatVector) +// Use the templated Array<> type here instead of ARRAY typekind for +// static template expansion +template +struct ComplexVectorReader, Config> { + static_assert(arrayReadableType); + using ElementType = typename SQLElementType::NativeType::NativeType; + using ElementValueType = + typename VectorReader::ValueType; + using ElementInputType = + typename VectorReader::InputType; + using ValueType = std::optional>; + using InputType = std::optional>; + + explicit ComplexVectorReader(VectorPtr& vector) { + VELOX_CHECK(vector->type()->kind() == TypeKind::ARRAY); + arrayVectorPtr_ = std::dynamic_pointer_cast(vector); + VELOX_CHECK_NOT_NULL(arrayVectorPtr_); + } + + struct PointerType { + explicit PointerType( + ArrayVectorPtr& arrayVectorPtr, + size_t rowIndex, + vector_size_t offset) + : arrayVectorPtr_(arrayVectorPtr), + rowIndex_(rowIndex), + vectorReader_( + const_cast(arrayVectorPtr->elements()), + offset) {} + + inline bool has_value() { + return !arrayVectorPtr_->isNullAt(rowIndex_); + } + + inline std::optional value() { + return *(vectorReader_[0]); + } + + inline size_t size() { + if (!has_value()) { + throw std::logic_error("element has no value"); + } + return arrayVectorPtr_->sizeAt(rowIndex_); + } + + inline void setNullAndSize() { + static_assert(Config::isWriter_); + auto mutableNulls = arrayVectorPtr_->mutableRawNulls(); + auto oldNullCount = arrayVectorPtr_->getNullCount(); + if (oldNullCount.hasValue()) { + arrayVectorPtr_->setNullCount(oldNullCount.value() + 1); + } + bits::setBit(mutableNulls, rowIndex_, bits::kNull); + setSize(0); + } + + inline void setNotNullAndSize(vector_size_t size) { + static_assert(Config::isWriter_); + auto mutableNulls = arrayVectorPtr_->mutableRawNulls(); + auto oldNullCount = arrayVectorPtr_->getNullCount(); + if (oldNullCount.hasValue()) { + arrayVectorPtr_->setNullCount(oldNullCount.value() - 1); + } + bits::setBit(mutableNulls, rowIndex_, bits::kNotNull); + setSize(size); + } + + inline ElementType& operator*() { + return *(vectorReader_[0]); + } + + inline const ElementType& operator*() const { + return *(vectorReader_[0]); + } + + inline typename VectorReader::PointerType + operator[](size_t elementIndex) { + return vectorReader_[elementIndex]; + } + + inline PointerType& operator=(InputType& other) { + if (!other.has_value()) { + setNullAndSize(); + return *this; + } else { + auto val = other.value(); + setNotNullAndSize(val.size()); + for (size_t i = 0; i < val.size(); i++) { + vectorReader_[i] = val[i]; + } + return *this; + } + } + + operator ValueType const() { + if (!has_value()) { + return {}; + } else { + std::vector val(size()); + for (size_t i = 0; i < size(); i++) { + val.push_back(this[i]); + } + return val; + } + } + + private: + ArrayVectorPtr& arrayVectorPtr_; + size_t rowIndex_; + VectorReader vectorReader_; + + inline void setSize(vector_size_t size) { + // reserve metadata vectors and resize if needed + auto mutableSizes = + arrayVectorPtr_->mutableSizes(size)->asMutable(); + auto mutableOffsets = + arrayVectorPtr_->mutableOffsets(size)->asMutable(); + + // FIXME: this assumes that setSize() is called in sequential + // order + mutableSizes[rowIndex_] = size; + if (rowIndex_ == 0) { + mutableOffsets[rowIndex_] = 0; + } else { + mutableOffsets[rowIndex_] = + mutableOffsets[rowIndex_ - 1] + mutableSizes[rowIndex_ - 1]; + } + return; + } + }; + + inline PointerType operator[](size_t rowIndex) { + // We only support simple arrays for now + VELOX_CHECK_NOT_NULL( + arrayVectorPtr_->elements()->asFlatVector()); + vector_size_t offset; + if constexpr (Config::isWriter_) { + if (rowIndex == 0) { + offset = 0; + } else { + offset = arrayVectorPtr_->offsetAt(rowIndex - 1) + + arrayVectorPtr_->sizeAt(rowIndex - 1); + } + } else { + offset = arrayVectorPtr_->offsetAt(rowIndex); + } + return PointerType{arrayVectorPtr_, rowIndex, offset}; + } + + private: + ArrayVectorPtr arrayVectorPtr_; +}; + +} // namespace codegen +} // namespace velox +} // namespace facebook diff --git a/velox/experimental/codegen/vector_function/GeneratedVectorFunction-inl.h b/velox/experimental/codegen/vector_function/GeneratedVectorFunction-inl.h index deee84242b68..568ce99110cd 100644 --- a/velox/experimental/codegen/vector_function/GeneratedVectorFunction-inl.h +++ b/velox/experimental/codegen/vector_function/GeneratedVectorFunction-inl.h @@ -18,6 +18,7 @@ #include #include #include +#include "velox/experimental/codegen/vector_function/ComplexVectorReader.h" #include "velox/experimental/codegen/vector_function/ConcatExpression-inl.h" #include "velox/experimental/codegen/vector_function/Perf.h" #include "velox/experimental/codegen/vector_function/VectorReader-inl.h" diff --git a/velox/experimental/codegen/vector_function/VectorReader-inl.h b/velox/experimental/codegen/vector_function/VectorReader-inl.h index 0e28ffdaf48a..de725813307c 100644 --- a/velox/experimental/codegen/vector_function/VectorReader-inl.h +++ b/velox/experimental/codegen/vector_function/VectorReader-inl.h @@ -13,6 +13,7 @@ */ #pragma once +#include #include "velox/buffer/Buffer.h" #include "velox/common/base/Nulls.h" #include "velox/experimental/codegen/vector_function/StringTypes.h" @@ -33,7 +34,7 @@ namespace codegen { // // true means set to null, false means not null // static constexpr bool intializedWithNullSet_ // -// // when true, the reader will never reveive a null value to write +// // when true, the reader will never receive a null value to write // static constexpr bool mayWriteNull_ // // @@ -42,6 +43,8 @@ namespace codegen { // constexpr static bool constantStringBuffersShared = false; // }; +// TODO: add bounds check everywhere + // TODO: move readers to different directory /// Only support scalarType for now template @@ -69,7 +72,8 @@ struct VectorReader { // The type used for codegen expression inputs using ValueType = std::optional; using InputType = ValueType; - explicit VectorReader(VectorPtr& vector) { + explicit VectorReader(VectorPtr& vector, vector_size_t offset = 0) + : offset_(offset) { VELOX_CHECK_NOT_NULL( std::dynamic_pointer_cast>(vector)); VELOX_CHECK_EQ(vector->typeKind(), SQLType::NativeType::typeKind); @@ -78,28 +82,30 @@ struct VectorReader { if constexpr (Config::isWriter_) { mutableRawNulls_ = flatVector->mutableRawNulls(); + mutableRawValues_ = flatVector->mutableRawValues(); } else { if constexpr (Config::mayReadNull_) { // TODO when read only vector does not have nulls we dont need to // allocate nulls mutableRawNulls_ = flatVector->mutableRawNulls(); } - } - - if constexpr (Config::isWriter_) { - mutableRawValues_ = flatVector->mutableRawValues(); - } else { mutableRawValues_ = const_cast(flatVector->rawValues()); } } - uint64_t* mutableRawNulls_; NativeType* mutableRawValues_; + uint64_t* mutableRawNulls_; + vector_size_t offset_ = 0; struct PointerType { size_t rowIndex_; NativeType* mutableValues_; uint64_t* mutableNulls_; + vector_size_t offset_; + + inline size_t index() const { + return rowIndex_ + offset_; + } inline bool has_value() { // FIXME: generated code should avoid calling on this on the writer and @@ -110,26 +116,33 @@ struct VectorReader { } else { // read nullability - return !bits::isBitNull(mutableNulls_, rowIndex_); + return !bits::isBitNull(mutableNulls_, index()); } } + inline NativeType& value() { + if (!has_value()) { + throw std::logic_error("element has no value"); + } + return mutableValues_[index()]; + } + inline NativeType& operator*() { - return mutableValues_[rowIndex_]; + return mutableValues_[index()]; } inline const NativeType& operator*() const { - return mutableValues_[rowIndex_]; + return mutableValues_[index()]; } inline PointerType& operator=(const NativeType& other) { static_assert(Config::isWriter_); if constexpr (Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, rowIndex_, bits::kNotNull); + bits::setBit(mutableNulls_, index(), bits::kNotNull); } - mutableValues_[rowIndex_] = other; + mutableValues_[index()] = other; return *this; } @@ -144,7 +157,7 @@ struct VectorReader { *this = *other; } else { if constexpr (!Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, rowIndex_, bits::kNull); + bits::setBit(mutableNulls_, index(), bits::kNull); } } return *this; @@ -160,7 +173,11 @@ struct VectorReader { }; inline PointerType operator[](size_t rowIndex) { - return {rowIndex, this->mutableRawValues_, this->mutableRawNulls_}; + return { + rowIndex, + this->mutableRawValues_, + this->mutableRawNulls_, + this->offset_}; } }; @@ -176,34 +193,31 @@ struct VectorReader< Config, std:: enable_if_t> { - explicit VectorReader(VectorPtr& vector) { + explicit VectorReader(VectorPtr& vector, vector_size_t offset = 0) + : offset_(offset) { VELOX_CHECK(vector->type()->kind() == TypeKind::BOOLEAN); auto flatVector = vector->asFlatVector(); VELOX_CHECK_NOT_NULL(flatVector); if constexpr (Config::isWriter_) { mutableRawNulls_ = flatVector->mutableRawNulls(); + mutableRawValues_ = flatVector->template mutableRawValues(); } else { // TODO when read only vector does not have nulls we dont need to allocate // nulls if constexpr (Config::mayReadNull_) { mutableRawNulls_ = flatVector->mutableRawNulls(); } - } - - if constexpr (Config::isWriter_) { - mutableRawValues_ = flatVector->template mutableRawValues(); - } else { mutableRawValues_ = const_cast(flatVector->template rawValues()); } } // The type used for codegen expression inputs - using ValueType = const std::optional; + using ValueType = std::optional; using InputType = ValueType; struct ReferenceType { - size_t rowIndex_; + size_t index_; uint64_t* mutableValues_; uint64_t* mutableNulls_; @@ -211,15 +225,15 @@ struct VectorReader< static_assert(Config::isWriter_); if constexpr (Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, rowIndex_, bits::kNotNull); + bits::setBit(mutableNulls_, index_, bits::kNotNull); } - bits::setBit(mutableValues_, rowIndex_, other); + bits::setBit(mutableValues_, index_, other); return *this; } operator bool() const { - return bits::isBitSet(mutableValues_, rowIndex_); + return bits::isBitSet(mutableValues_, index_); } }; @@ -227,6 +241,11 @@ struct VectorReader< size_t rowIndex_; uint64_t* mutableValues_; uint64_t* mutableNulls_; + vector_size_t offset_; + + inline size_t index() { + return offset_ + rowIndex_; + } inline bool has_value() { static_assert(!Config::isWriter_); @@ -234,24 +253,32 @@ struct VectorReader< return true; } else { // read nullability - return !bits::isBitNull(mutableNulls_, rowIndex_); + return !bits::isBitNull(mutableNulls_, index()); } } + inline ReferenceType value() { + if (!has_value()) { + throw std::logic_error("element has no value"); + } + + return ReferenceType{index(), mutableValues_, mutableNulls_}; + } + inline ReferenceType operator*() { - return ReferenceType{rowIndex_, mutableValues_, mutableNulls_}; + return ReferenceType{index(), mutableValues_, mutableNulls_}; } inline const ReferenceType operator*() const { - return ReferenceType{rowIndex_, mutableValues_, mutableNulls_}; + return ReferenceType{index(), mutableValues_, mutableNulls_}; } inline PointerType& operator=(const bool& other) { static_assert(Config::isWriter_); if constexpr (Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, rowIndex_, bits::kNotNull); + bits::setBit(mutableNulls_, index(), bits::kNotNull); } - bits::setBit(mutableValues_, rowIndex_, other); + bits::setBit(mutableValues_, index(), other); return *this; } @@ -266,7 +293,7 @@ struct VectorReader< *this = *other; } else { if constexpr (!Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, rowIndex_, bits::kNull); + bits::setBit(mutableNulls_, index(), bits::kNull); } } return *this; @@ -283,12 +310,16 @@ struct VectorReader< inline PointerType operator[](size_t rowIndex) { return PointerType{ - rowIndex, this->mutableRawValues_, this->mutableRawNulls_}; + rowIndex, + this->mutableRawValues_, + this->mutableRawNulls_, + this->offset_}; } private: - uint64_t* mutableRawNulls_; uint64_t* mutableRawValues_; + uint64_t* mutableRawNulls_; + vector_size_t offset_ = 0; }; //**************************************************************************** @@ -303,25 +334,21 @@ struct VectorReader< std::enable_if_t< SQLType::NativeType::typeKind == TypeKind::VARCHAR, StringView>> { - explicit VectorReader(VectorPtr& vector) - : vector_(vector->template asFlatVector()) { + explicit VectorReader(VectorPtr& vector, vector_size_t offset = 0) + : vector_(vector->asFlatVector()), offset_(offset) { VELOX_CHECK(vector->type()->kind() == TypeKind::VARCHAR); auto flatVector = vector->asFlatVector(); VELOX_CHECK_NOT_NULL(flatVector); if constexpr (Config::isWriter_) { mutableRawNulls_ = flatVector->mutableRawNulls(); + mutableRawValues_ = flatVector->template mutableRawValues(); } else { // TODO when read only vector does not have nulls we dont need to allocate // nulls if constexpr (Config::mayReadNull_) { mutableRawNulls_ = flatVector->mutableRawNulls(); } - } - - if constexpr (Config::isWriter_) { - mutableRawValues_ = flatVector->template mutableRawValues(); - } else { mutableRawValues_ = const_cast(flatVector->template rawValues()); } @@ -337,15 +364,29 @@ struct VectorReader< StringProxy( FlatVector* vector, vector_size_t rowIndex, - StringView* mutableValues) - : vector_(vector), rowIndex_(rowIndex), mutableValues_(mutableValues) {} + StringView* mutableValues, + vector_size_t offset) + : vector_(vector), + rowIndex_(rowIndex), + mutableValues_(mutableValues), + offset_(offset) { + // We need to get a reference to the StringView to avoid inlined + // prefix + auto& string = vector->rawValues()[index()]; + setData(const_cast(string.data())); + setSize(string.size()); + } + + inline size_t index() const { + return offset_ + rowIndex_; + } void operator=(const InputReferenceString& other_) { static_assert(Config::isWriter_); auto& other = other_.get(); if constexpr (Config::inputStringBuffersShared) { - mutableValues_[rowIndex_] = other_; + mutableValues_[index()] = other_; } else { reserve(other.size()); if (other.size() != 0) { @@ -361,7 +402,7 @@ struct VectorReader< auto& other = other_.get(); if constexpr (Config::constantStringBuffersShared) { - mutableValues_[rowIndex_] = other_; + mutableValues_[index()] = other_; } else { reserve(other.size()); if (other.size() != 0) { @@ -437,7 +478,7 @@ struct VectorReader< if (buffer_) { buffer_->setSize(buffer_->size() + size()); } - mutableValues_[rowIndex_] = StringView(data(), size()); + mutableValues_[index()] = StringView(data(), size()); return; } @@ -480,20 +521,27 @@ struct VectorReader< int32_t rowIndex_; StringView* mutableValues_; + + vector_size_t offset_ = 0; }; struct PointerType { uint64_t* mutableNulls_; StringProxy proxy_; + vector_size_t offset_; inline bool has_value() { if constexpr (Config::mayReadNull_) { - return !bits::isBitNull(mutableNulls_, proxy_.rowIndex()); + return !bits::isBitNull(mutableNulls_, proxy_.index()); } else { return true; } } + inline StringProxy& value() { + return proxy_; + } + operator codegen::InputReferenceStringNullable const() { static_assert(!Config::isWriter_); @@ -502,14 +550,14 @@ struct VectorReader< } return codegen::InputReferenceStringNullable{ - InputReferenceString{proxy_.mutableValues()[proxy_.rowIndex()]}}; + InputReferenceString{proxy_.mutableValues()[proxy_.index()]}}; } inline PointerType& operator=(const InputReferenceStringNullable& other) { static_assert(Config::isWriter_); if constexpr (!Config::mayWriteNull_) { if constexpr (Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, proxy_.rowIndex(), !bits::kNull); + bits::setBit(mutableNulls_, proxy_.index(), !bits::kNull); } proxy_ = *other; @@ -517,13 +565,13 @@ struct VectorReader< // may have null if (other.has_value()) { if constexpr (Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, proxy_.rowIndex(), !bits::kNull); + bits::setBit(mutableNulls_, proxy_.index(), !bits::kNull); } proxy_ = *other; } else { if constexpr (!Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, proxy_.rowIndex(), bits::kNull); + bits::setBit(mutableNulls_, proxy_.index(), bits::kNull); } } } @@ -535,7 +583,7 @@ struct VectorReader< static_assert(Config::isWriter_); if constexpr (!Config::mayWriteNull_) { if constexpr (Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, proxy_.rowIndex(), bits::kNotNull); + bits::setBit(mutableNulls_, proxy_.index(), bits::kNotNull); } proxy_ = *other; @@ -543,13 +591,13 @@ struct VectorReader< // may have null if (other.has_value()) { if constexpr (Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, proxy_.rowIndex(), bits::kNotNull); + bits::setBit(mutableNulls_, proxy_.index(), bits::kNotNull); } proxy_ = *other; } else { if constexpr (!Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, proxy_.rowIndex(), bits::kNull); + bits::setBit(mutableNulls_, proxy_.index(), bits::kNull); } } } @@ -561,23 +609,23 @@ struct VectorReader< static_assert(Config::isWriter_); if constexpr (!Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, proxy_.rowIndex(), bits::kNull); + bits::setBit(mutableNulls_, proxy_.index(), bits::kNull); } return *this; } inline StringProxy& operator*() { - static_assert(Config::isWriter_); + // static_assert(Config::isWriter_); if constexpr (Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, proxy_.rowIndex(), bits::kNotNull); + bits::setBit(mutableNulls_, proxy_.index(), bits::kNotNull); } return proxy_; } inline const StringProxy& operator*() const { - static_assert(Config::isWriter_); + // static_assert(Config::isWriter_); if constexpr (Config::intializedWithNullSet_) { - bits::setBit(mutableNulls_, proxy_.rowIndex(), bits::kNotNull); + bits::setBit(mutableNulls_, proxy_.index(), bits::kNotNull); } return proxy_; } @@ -585,13 +633,16 @@ struct VectorReader< inline PointerType operator[](size_t rowIndex) { return PointerType{ - mutableRawNulls_, StringProxy(vector_, rowIndex, mutableRawValues_)}; + mutableRawNulls_, + StringProxy(vector_, rowIndex, mutableRawValues_, this->offset_), + this->offset_}; } private: - uint64_t* mutableRawNulls_; StringView* mutableRawValues_; + uint64_t* mutableRawNulls_; FlatVector* vector_; + vector_size_t offset_ = 0; }; } // namespace codegen diff --git a/velox/experimental/codegen/vector_function/tests/ArrayVectorReaderTest.cpp b/velox/experimental/codegen/vector_function/tests/ArrayVectorReaderTest.cpp new file mode 100644 index 000000000000..2f557282ca10 --- /dev/null +++ b/velox/experimental/codegen/vector_function/tests/ArrayVectorReaderTest.cpp @@ -0,0 +1,234 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include "velox/experimental/codegen/vector_function/GeneratedVectorFunction-inl.h" // NOLINT (CLANGTIDY ) +#include "velox/experimental/codegen/vector_function/tests/VectorReaderTestBase.h" +#include "velox/type/Type.h" + +namespace facebook::velox::codegen { + +TEST_F(ComplexVectorReaderTest, ReadArraySmallintVectors) { + /// ArrayVector>: + /// [ null, [0x0333, null, 0x1444], [0x1666, 0x0777, null, 0x0999] ] + /// size: 3 + /// offsets: [0, 0, 3] + /// lengths: [0, 3, 4] + /// nulls: 0b001 + /// elements: + /// FlatVector: + /// size: 7 + /// [0x0333, null, 0x1444, 0x1666, 0x0777, null, 0x0999] + /// nulls: 0b0100000 + size_t flatVectorSize = 7; + auto flatVector = + makeFlatVectorPtr(flatVectorSize, SMALLINT(), pool_.get()); + + size_t arrayVectorSize = 3; + auto arrayVector = makeArrayVectorPtr( + arrayVectorSize, pool_.get(), ARRAY(SMALLINT()), flatVector); + + ComplexVectorReader, OutputReaderConfig> + writer(arrayVector); + ComplexVectorReader, InputReaderConfig> + reader(arrayVector); + + writer[0].setNullAndSize(); + ASSERT_FALSE(reader[0].has_value()); + + writer[1].setNotNullAndSize(3); + writer[1][0] = 0x0333; + writer[1][1] = std::nullopt; + writer[1][2] = 0x1444; + ASSERT_EQ(reader[1][0].value(), 0x0333); + ASSERT_FALSE(reader[1][1].has_value()); + ASSERT_EQ(reader[1][2].value(), 0x1444); + ASSERT_EQ(reader[1].size(), 3); + + using InputType = ComplexVectorReader< + Array, + OutputReaderConfig>::InputType; + using ElementInputType = ComplexVectorReader< + Array, + OutputReaderConfig>::ElementInputType; + + InputType smallint2 = std::make_optional( + std::vector{0x1666, 0x0777, std::nullopt, 0x0999}); + writer[2] = smallint2; + ASSERT_EQ(reader[2][0].value(), 0x1666); + ASSERT_EQ(reader[2][1].value(), 0x0777); + ASSERT_FALSE(reader[2][2].has_value()); + ASSERT_EQ(reader[2][3].value(), 0x0999); +} + +TEST_F(ComplexVectorReaderTest, ReadArrayBoolVectors) { + /// ArrayVector>: + /// [ [true, false, null], null, [false, false, null, false], null] + /// size: 4 + /// offsets: [0, 3, 3, 7] + /// lengths: [3, 0, 4, 0] + /// nulls: 0b1010 + /// elements: + /// FlatVector: + /// size: 7 + /// [true, false, null, false, false, null, false] + /// nulls: 0b0100100 + size_t flatVectorSize = 7; + auto flatVector = + makeFlatVectorPtr(flatVectorSize, BOOLEAN(), pool_.get()); + + size_t arrayVectorSize = 4; + auto arrayVector = makeArrayVectorPtr( + arrayVectorSize, pool_.get(), ARRAY(BOOLEAN()), flatVector); + + ComplexVectorReader, OutputReaderConfig> + writer(arrayVector); + ComplexVectorReader, InputReaderConfig> + reader(arrayVector); + + // if we set elements one-by-one, we need to explicitly call + // setNotNullAndSize() + writer[0].setNotNullAndSize(3); + writer[0][0] = true; + writer[0][1] = false; + writer[0][2] = std::nullopt; + ASSERT_EQ(reader[0][0].value(), true); + ASSERT_EQ(reader[0][1].value(), false); + ASSERT_FALSE(reader[0][2].has_value()); + ASSERT_EQ(reader[0].size(), 3); + + writer[1].setNullAndSize(); + ASSERT_FALSE(reader[1].has_value()); + EXPECT_THROW(reader[1].size(), std::logic_error); + + using InputType = ComplexVectorReader< + Array, + OutputReaderConfig>::InputType; + using ElementInputType = ComplexVectorReader< + Array, + OutputReaderConfig>::ElementInputType; + + InputType bool2 = std::make_optional( + std::vector{false, false, std::nullopt, false}); + writer[2] = bool2; + ASSERT_EQ(reader[2][0].value(), false); + ASSERT_EQ(reader[2][1].value(), false); + ASSERT_FALSE(reader[2][2].has_value()); + ASSERT_EQ(reader[2][3].value(), false); + ASSERT_EQ(reader[2].size(), 4); + + InputType bool3 = std::nullopt; + writer[3] = bool3; + ASSERT_FALSE(reader[3].has_value()); + EXPECT_THROW(reader[3].size(), std::logic_error); +} + +TEST_F(ComplexVectorReaderTest, ReadArrayStringVectors) { + /// ArrayVector>: + /// [ hello, longString, emptyString, null ], [null, world], null, null] + /// size: 4 + /// offsets: [0, 4, 6, 6] + /// lengths: [4, 2, 0, 0] + /// nulls: 0b1100 + /// elements: + /// FlatVector: + /// size: 6 + /// [ hello, longString, emptyString, null, null, world] + /// nulls: 0b011000 + + auto helloRef = facebook::velox::StringView(u8"Hello", 5); + InputReferenceStringNullable hello{InputReferenceString(helloRef)}; + auto longStringRef = + StringView(u8"This is a rather long string. Quite long indeed.", 49); + InputReferenceStringNullable longString{InputReferenceString(longStringRef)}; + auto emptyStringRef = StringView(u8"", 0); + InputReferenceStringNullable emptyString{ + InputReferenceString(emptyStringRef)}; + auto worldRef = StringView(u8"World", 5); + InputReferenceStringNullable world{InputReferenceString(worldRef)}; + + size_t flatVectorSize = 6; + auto flatVector = + makeFlatVectorPtr(flatVectorSize, VARCHAR(), pool_.get()); + + size_t arrayVectorSize = 4; + auto arrayVector = makeArrayVectorPtr( + arrayVectorSize, pool_.get(), ARRAY(VARCHAR()), flatVector); + + ComplexVectorReader, OutputReaderConfig> + writer(arrayVector); + ComplexVectorReader, InputReaderConfig> + reader(arrayVector); + + writer[0].setNotNullAndSize(4); + writer[0][0] = hello; + writer[0][1] = longString; + writer[0][2] = emptyString; + writer[0][3] = std::nullopt; + + ASSERT_TRUE(reader[0].has_value()); + + ASSERT_TRUE(reader[0][0].has_value()); + ASSERT_EQ(reader[0][0].value().size(), 5); + ASSERT_TRUE(gtestMemcmp( + (*reader[0][0]).data(), (void*)helloRef.data(), (*reader[0][0]).size())); + + ASSERT_TRUE(reader[0][1].has_value()); + ASSERT_EQ(reader[0][1].value().size(), 49); + ASSERT_TRUE(gtestMemcmp( + (*reader[0][1]).data(), + (void*)longStringRef.data(), + (*reader[0][1]).size())); + + ASSERT_TRUE(reader[0][2].has_value()); + ASSERT_EQ(reader[0][2].value().size(), 0); + ASSERT_TRUE(gtestMemcmp( + (*reader[0][2]).data(), (void*)helloRef.data(), (*reader[0][2]).size())); + + ASSERT_FALSE(reader[0][3].has_value()); + + using InputType = ComplexVectorReader< + Array, + OutputReaderConfig>::InputType; + using ElementInputType = ComplexVectorReader< + Array, + OutputReaderConfig>::ElementInputType; + InputType string1 = std::make_optional( + std::vector{InputReferenceStringNullable{}, world}); + writer[1] = string1; + + ASSERT_TRUE(reader[1].has_value()); + + ASSERT_FALSE(reader[1][0].has_value()); + + ASSERT_TRUE(reader[1][1].has_value()); + ASSERT_EQ(reader[1][1].value().size(), 5); + ASSERT_TRUE(gtestMemcmp( + (*reader[1][1]).data(), (void*)worldRef.data(), (*reader[1][1]).size())); + + writer[2].setNullAndSize(); + ASSERT_FALSE(reader[2].has_value()); + EXPECT_THROW(reader[2].size(), std::logic_error); + + InputType val = std::nullopt; + writer[3] = val; + ASSERT_FALSE(reader[3].has_value()); + EXPECT_THROW(reader[3].size(), std::logic_error); +} + +} // namespace facebook::velox::codegen diff --git a/velox/experimental/codegen/vector_function/tests/CMakeLists.txt b/velox/experimental/codegen/vector_function/tests/CMakeLists.txt index 41a891e5c977..6d6d7dd50ce9 100644 --- a/velox/experimental/codegen/vector_function/tests/CMakeLists.txt +++ b/velox/experimental/codegen/vector_function/tests/CMakeLists.txt @@ -10,8 +10,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_executable(velox_codegen_vector_function_test - veloxCodegenVectorFunctionTest.cpp, TempStringTest.cpp) +add_executable( + velox_codegen_vector_function_test CodegenVectorFunctionTest.cpp + TempStringTest.cpp VectorReaderTest.cpp) add_test(velox_codegen_vector_function_test velox_codegen_vector_function_test) target_link_libraries( @@ -37,7 +38,7 @@ target_link_libraries( velox_dwio_type_fbhive velox_dwrf_test_utils velox_presto_serializer - velox_transform + velox_transform_utils ${ANTLR4_RUNTIME} ${Boost_ATOMIC_LIBRARIES} ${Boost_CONTEXT_LIBRARIES} diff --git a/velox/experimental/codegen/vector_function/tests/CodegenVectorFunctionTest.cpp b/velox/experimental/codegen/vector_function/tests/CodegenVectorFunctionTest.cpp index 7319d41f7586..40d539798e3e 100644 --- a/velox/experimental/codegen/vector_function/tests/CodegenVectorFunctionTest.cpp +++ b/velox/experimental/codegen/vector_function/tests/CodegenVectorFunctionTest.cpp @@ -76,38 +76,6 @@ TEST(TestConcat, BasicConcatRow) { EXPECT_EQ(std::get<1>(output), *std::get<0>(args) - *std::get<1>(args)); } -TEST(VectorReader, ReadDoublesVectors) { - const size_t vectorSize = 1000; - auto inRowType = ROW({"columnA", "columnB"}, {DOUBLE(), DOUBLE()}); - auto outRowType = ROW({"expr1", "expr2"}, {DOUBLE(), DOUBLE()}); - - auto pool_ = memory::getDefaultScopedMemoryPool(); - auto pool = pool_.get(); - auto inRowVector = BaseVector::create(inRowType, vectorSize, pool); - auto outRowVector = BaseVector::create(outRowType, vectorSize, pool); - - VectorPtr& in1 = inRowVector->as()->childAt(0); - - SelectivityVector selectivityVector(vectorSize); - selectivityVector.setAll(); - in1->resize(vectorSize); - in1->addNulls(nullptr, selectivityVector); - VectorReader> writer(in1); - VectorReader> reader(in1); - - for (size_t row = 0; row < vectorSize; row++) { - writer[row] = (double)row; - } - - for (size_t row = 0; row < vectorSize; row++) { - ASSERT_DOUBLE_EQ((double)row, *reader[row]); - } - - for (size_t row = 0; row < vectorSize; row++) { - ASSERT_DOUBLE_EQ(*reader[row], in1->asFlatVector()->valueAt(row)); - } -} - TEST(TestConcat, EvalConcatFunction) { const size_t rowLength = 1000; SelectivityVector rows(rowLength); @@ -222,48 +190,6 @@ struct GeneratedVectorFunctionConfigBool { static constexpr bool isDefaultNullStrict = false; }; -TEST(VectorReader, ReadBoolVectors) { - // TODO: Move those to test class - auto pool_ = memory::getDefaultScopedMemoryPool(); - auto pool = pool_.get(); - const size_t vectorSize = 1000; - - auto inRowType = ROW({"columnA", "columnB"}, {BOOLEAN(), BOOLEAN()}); - auto outRowType = ROW({"expr1", "expr2"}, {BOOLEAN(), BOOLEAN()}); - - auto inRowVector = BaseVector::create(inRowType, vectorSize, pool); - auto outRowVector = BaseVector::create(outRowType, vectorSize, pool); - - VectorPtr& inputVector = inRowVector->as()->childAt(0); - inputVector->resize(vectorSize); - VectorReader> reader( - inputVector); - VectorReader> writer( - inputVector); - - for (size_t row = 0; row < vectorSize; row++) { - writer[row] = row % 2 == 0; - } - - // Check that writing of values to the reader was success - for (size_t row = 0; row < vectorSize; row++) { - ASSERT_DOUBLE_EQ((row % 2 == 0), *reader[row]); - ASSERT_DOUBLE_EQ( - (row % 2 == 0), inputVector->asFlatVector()->valueAt(row)); - } - - // Write a null at even indices - for (size_t row = 0; row < vectorSize; row++) { - if (row % 2) { - writer[row] = std::nullopt; - } - } - - for (size_t row = 0; row < vectorSize; row++) { - ASSERT_EQ(inputVector->asFlatVector()->isNullAt(row), row % 2); - } -} - TEST(TestBooEvalVectorFunction, EvalBoolExpression) { // TODO: Move those to test class auto pool_ = memory::getDefaultScopedMemoryPool(); diff --git a/velox/experimental/codegen/vector_function/tests/VectorReaderTest.cpp b/velox/experimental/codegen/vector_function/tests/VectorReaderTest.cpp new file mode 100644 index 000000000000..a1dafe8aacf9 --- /dev/null +++ b/velox/experimental/codegen/vector_function/tests/VectorReaderTest.cpp @@ -0,0 +1,134 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include "velox/experimental/codegen/vector_function/GeneratedVectorFunction-inl.h" // NOLINT (CLANGTIDY ) +#include "velox/experimental/codegen/vector_function/tests/VectorReaderTestBase.h" +#include "velox/type/Type.h" +#include "velox/vector/tests/VectorMaker.h" + +namespace facebook::velox::codegen { + +TEST_F(VectorReaderTestBase, ReadDoublesVectors) { + const size_t vectorSize = 1000; + auto inRowType = ROW({"columnA", "columnB"}, {DOUBLE(), DOUBLE()}); + auto inRowVector = BaseVector::create(inRowType, vectorSize, pool_.get()); + + VectorPtr& in1 = inRowVector->as()->childAt(0); + + SelectivityVector selectivityVector(vectorSize); + selectivityVector.setAll(); + in1->resize(vectorSize); + in1->addNulls(nullptr, selectivityVector); + VectorReader> writer(in1); + VectorReader> reader(in1); + + for (size_t row = 0; row < vectorSize; row++) { + writer[row] = (double)row; + } + + for (size_t row = 0; row < vectorSize; row++) { + ASSERT_DOUBLE_EQ((double)row, *reader[row]); + } + + for (size_t row = 0; row < vectorSize; row++) { + ASSERT_DOUBLE_EQ(*reader[row], in1->asFlatVector()->valueAt(row)); + } +} + +TEST_F(VectorReaderTestBase, ReadBoolVectors) { + const size_t vectorSize = 1000; + auto inRowType = ROW({"columnA", "columnB"}, {BOOLEAN(), BOOLEAN()}); + + auto inRowVector = BaseVector::create(inRowType, vectorSize, pool_.get()); + + VectorPtr& inputVector = inRowVector->as()->childAt(0); + inputVector->resize(vectorSize); + VectorReader> reader( + inputVector); + VectorReader> writer( + inputVector); + + for (size_t row = 0; row < vectorSize; row++) { + writer[row] = row % 2 == 0; + } + + // Check that writing of values to the reader was success + for (size_t row = 0; row < vectorSize; row++) { + ASSERT_DOUBLE_EQ((row % 2 == 0), *reader[row]); + ASSERT_DOUBLE_EQ( + (row % 2 == 0), inputVector->asFlatVector()->valueAt(row)); + } + + // Write a null at even indices + for (size_t row = 0; row < vectorSize; row++) { + if (row % 2) { + writer[row] = std::nullopt; + } + } + + for (size_t row = 0; row < vectorSize; row++) { + ASSERT_EQ(inputVector->asFlatVector()->isNullAt(row), row % 2); + } +} + +TEST_F(VectorReaderTestBase, ReadStringVectors) { + const size_t vectorSize = 4; + auto inRowType = ROW({"columnA"}, {VARCHAR()}); + + auto inRowVector = BaseVector::create(inRowType, vectorSize, pool_.get()); + + VectorPtr& inputVector = inRowVector->as()->childAt(0); + inputVector->resize(vectorSize); + + VectorReader> writer( + inputVector); + VectorReader> reader( + inputVector); + + auto helloWorldRef = facebook::velox::StringView(u8"Hello, World!", 13); + InputReferenceStringNullable helloWorld{InputReferenceString(helloWorldRef)}; + auto emptyStringRef = StringView(u8"", 0); + InputReferenceStringNullable emptyString{ + InputReferenceString(emptyStringRef)}; + auto inlineRef = StringView(u8"INLINE", 6); + InputReferenceStringNullable inlineString{InputReferenceString(inlineRef)}; + + writer[0] = helloWorld; + writer[1] = emptyString; + writer[2] = std::nullopt; + writer[3] = inlineString; + + ASSERT_TRUE(reader[0].has_value()); + ASSERT_EQ(reader[0].value().size(), 13); + ASSERT_TRUE(gtestMemcmp( + (*reader[0]).data(), (void*)"Hello, World!", (*reader[0]).size())); + + ASSERT_TRUE(reader[1].has_value()); + ASSERT_EQ(reader[1].value().size(), 0); + ASSERT_TRUE(gtestMemcmp((*reader[1]).data(), (void*)"", (*reader[1]).size())); + + ASSERT_FALSE(reader[2].has_value()); + + ASSERT_TRUE(reader[3].has_value()); + ASSERT_EQ(reader[3].value().size(), 6); + ASSERT_TRUE( + gtestMemcmp((*reader[3]).data(), (void*)"INLINE", (*reader[3]).size())); +} + +} // namespace facebook::velox::codegen diff --git a/velox/experimental/codegen/vector_function/tests/VectorReaderTestBase.h b/velox/experimental/codegen/vector_function/tests/VectorReaderTestBase.h new file mode 100644 index 000000000000..4b3c8ff4be5d --- /dev/null +++ b/velox/experimental/codegen/vector_function/tests/VectorReaderTestBase.h @@ -0,0 +1,73 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "velox/experimental/codegen/vector_function/ComplexVectorReader.h" +#include "velox/type/Type.h" +#include "velox/vector/BaseVector.h" + +namespace facebook::velox::codegen { + +class VectorReaderTestBase : public ::testing::Test { + protected: + std::unique_ptr pool_ = + memory::getDefaultScopedMemoryPool(); + + testing::AssertionResult gtestMemcmp(void* lhs, void* rhs, size_t count) { + return std::memcmp(lhs, rhs, count) ? testing::AssertionFailure() + : testing::AssertionSuccess(); + } +}; + +class ComplexVectorReaderTest : public VectorReaderTestBase { + protected: + template + VectorPtr makeFlatVectorPtr( + size_t flatVectorSize, + const TypePtr type, + memory::MemoryPool* pool) { + auto vector = BaseVector::create(type, flatVectorSize, pool); + return vector; + } + + VectorPtr makeArrayVectorPtr( + size_t arrayVectorSize, + memory::MemoryPool* pool, + const TypePtr type, + VectorPtr elements) { + BufferPtr offsets = AlignedBuffer::allocate(arrayVectorSize, pool); + auto* offsetsPtr = offsets->asMutable(); + BufferPtr lengths = + AlignedBuffer::allocate(arrayVectorSize, pool); + auto* lengthsPtr = lengths->asMutable(); + BufferPtr nulls = + AlignedBuffer::allocate(bits::nbytes(arrayVectorSize), pool); + auto* nullsPtr = nulls->asMutable(); + + size_t nullCount = 0; + + return std::make_shared( + pool, + type, + nulls, + arrayVectorSize, + offsets, + lengths, + elements, + nullCount); + } +}; + +} // namespace facebook::velox::codegen