Skip to content

Commit

Permalink
[refactor](storage) Expressing the types of computation layer and sto…
Browse files Browse the repository at this point in the history
…rage layer in PrimitiveTypeTraits
  • Loading branch information
mrhhsg committed Nov 1, 2023
1 parent 6de1cc7 commit f088cab
Show file tree
Hide file tree
Showing 18 changed files with 296 additions and 247 deletions.
41 changes: 2 additions & 39 deletions be/src/exprs/bloom_filter_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,37 +416,10 @@ struct FixedStringFindOp : public StringFindOp {
}
};

struct DateTimeFindOp : public CommonFindOp<VecDateTimeValue> {
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
VecDateTimeValue value;
value.from_olap_datetime(*reinterpret_cast<const uint64_t*>(data));
return bloom_filter.test(Slice((char*)&value, sizeof(VecDateTimeValue)));
}
};

// avoid violating C/C++ aliasing rules.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101684

struct DateFindOp : public CommonFindOp<VecDateTimeValue> {
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
uint24_t date = *static_cast<const uint24_t*>(data);
uint64_t value = uint32_t(date);

VecDateTimeValue date_value;
date_value.from_olap_date(value);

return bloom_filter.test(Slice((char*)&date_value, sizeof(VecDateTimeValue)));
}
};

struct DecimalV2FindOp : public CommonFindOp<DecimalV2Value> {
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
auto packed_decimal = *static_cast<const decimal12_t*>(data);
DecimalV2Value value;
int64_t int_value = packed_decimal.integer;
int32_t frac_value = packed_decimal.fraction;
value.from_olap_decimal(int_value, frac_value);

// Predicate column using decimalv2value as column value type, so not need convert here
DecimalV2Value value = *static_cast<const DecimalV2Value*>(data);
constexpr int decimal_value_sz = sizeof(DecimalV2Value);
char data_bytes[decimal_value_sz];
memcpy(&data_bytes, &value, decimal_value_sz);
Expand All @@ -460,16 +433,6 @@ struct BloomFilterTypeTraits {
using FindOp = CommonFindOp<T>;
};

template <>
struct BloomFilterTypeTraits<TYPE_DATE> {
using FindOp = DateFindOp;
};

template <>
struct BloomFilterTypeTraits<TYPE_DATETIME> {
using FindOp = DateTimeFindOp;
};

template <>
struct BloomFilterTypeTraits<TYPE_DECIMALV2> {
using FindOp = DecimalV2FindOp;
Expand Down
29 changes: 21 additions & 8 deletions be/src/olap/column_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,27 @@ enum class PredicateType {
MATCH = 13, // fulltext match
};

template <PrimitiveType primitive_type, typename ResultType>
ResultType get_zone_map_value(void* data_ptr) {
ResultType res;
// DecimalV2's storage value is different from predicate or compute value type
// need convert it to DecimalV2Value
if constexpr (primitive_type == PrimitiveType::TYPE_DECIMALV2) {
decimal12_t decimal_12_t_value;
memcpy((char*)(&decimal_12_t_value), data_ptr, sizeof(decimal12_t));
res.from_olap_decimal(decimal_12_t_value.integer, decimal_12_t_value.fraction);
} else if constexpr (primitive_type == PrimitiveType::TYPE_DATE) {
static_assert(std::is_same_v<ResultType, VecDateTimeValue>);
res.from_olap_date(*reinterpret_cast<uint24_t*>(data_ptr));
} else if constexpr (primitive_type == PrimitiveType::TYPE_DATETIME) {
static_assert(std::is_same_v<ResultType, VecDateTimeValue>);
res.from_olap_datetime(*reinterpret_cast<uint64_t*>(data_ptr));
} else {
memcpy(&res, data_ptr, sizeof(ResultType));
}
return res;
}

inline std::string type_to_string(PredicateType type) {
switch (type) {
case PredicateType::UNKNOWN:
Expand Down Expand Up @@ -264,14 +285,6 @@ class ColumnPredicate {
}

protected:
// Just prevent access not align memory address coredump
template <class T>
T _get_zone_map_value(void* data_ptr) const {
T res;
memcpy(&res, data_ptr, sizeof(T));
return res;
}

virtual std::string _debug_string() const = 0;

uint32_t _column_id;
Expand Down
67 changes: 47 additions & 20 deletions be/src/olap/comparison_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace doris {
template <PrimitiveType Type, PredicateType PT>
class ComparisonPredicateBase : public ColumnPredicate {
public:
using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
using T = typename PrimitiveTypeTraits<Type>::CppType;
ComparisonPredicateBase(uint32_t column_id, const T& value, bool opposite = false)
: ColumnPredicate(column_id, opposite),
_cached_code(_InvalidateCodeValue),
Expand Down Expand Up @@ -67,7 +67,20 @@ class ComparisonPredicateBase : public ColumnPredicate {

roaring::Roaring roaring;
bool exact_match = false;
Status status = iterator->seek_dictionary(&_value, &exact_match);

Status status;
if constexpr (Type == TYPE_DATE) {
uint24_t value = _value.to_olap_date();
status = iterator->seek_dictionary(&value, &exact_match);
} else if constexpr (Type == TYPE_DATETIME) {
auto value = _value.to_olap_datetime();
status = iterator->seek_dictionary(&value, &exact_match);
} else if constexpr (Type == TYPE_DECIMALV2) {
decimal12_t value {_value.int_value(), _value.frac_value()};
status = iterator->seek_dictionary(&value, &exact_match);
} else {
status = iterator->seek_dictionary(&_value, &exact_match);
}
rowid_t seeked_ordinal = iterator->current_ordinal();

return _bitmap_compare(status, exact_match, ordinal_limit, seeked_ordinal, iterator,
Expand Down Expand Up @@ -107,8 +120,22 @@ class ComparisonPredicateBase : public ColumnPredicate {
}

roaring::Roaring roaring;
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &_value, query_type,
num_rows, &roaring));
if constexpr (Type == PrimitiveType::TYPE_DATE) {
uint24_t date = _value.to_olap_date();
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &date, query_type,
num_rows, &roaring));
} else if constexpr (Type == PrimitiveType::TYPE_DATETIME) {
uint64_t date = _value.to_olap_datetime();
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &date, query_type,
num_rows, &roaring));
} else if constexpr (Type == TYPE_DECIMALV2) {
decimal12_t value {_value.int_value(), _value.frac_value()};
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type,
num_rows, &roaring));
} else {
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &_value, query_type,
num_rows, &roaring));
}

// mask out null_bitmap, since NULL cmp VALUE will produce NULL
// and be treated as false in WHERE
Expand Down Expand Up @@ -150,17 +177,13 @@ class ComparisonPredicateBase : public ColumnPredicate {
_evaluate_bit<true>(column, sel, size, flags);
}

using WarpperFieldType = std::conditional_t<Type == TYPE_DATE, uint24_t, T>;

bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
if (statistic.first->is_null()) {
return true;
}

T tmp_min_value {};
T tmp_max_value {};
memcpy((char*)(&tmp_min_value), statistic.first->cell_ptr(), sizeof(WarpperFieldType));
memcpy((char*)(&tmp_max_value), statistic.second->cell_ptr(), sizeof(WarpperFieldType));
T tmp_min_value = get_zone_map_value<Type, T>(statistic.first->cell_ptr());
T tmp_max_value = get_zone_map_value<Type, T>(statistic.second->cell_ptr());

if constexpr (PT == PredicateType::EQ) {
return _operator(tmp_min_value <= _value && tmp_max_value >= _value, true);
Expand All @@ -183,10 +206,8 @@ class ComparisonPredicateBase : public ColumnPredicate {
<< " Type: " << Type << " sizeof(T): " << sizeof(T)
<< " statistic.first->size(): " << statistic.first->size();

T tmp_min_value {};
T tmp_max_value {};
memcpy((char*)(&tmp_min_value), statistic.first->cell_ptr(), sizeof(WarpperFieldType));
memcpy((char*)(&tmp_max_value), statistic.second->cell_ptr(), sizeof(WarpperFieldType));
T tmp_min_value = get_zone_map_value<Type, T>(statistic.first->cell_ptr());
T tmp_max_value = get_zone_map_value<Type, T>(statistic.second->cell_ptr());

if constexpr (PT == PredicateType::LT) {
return _value > tmp_max_value;
Expand All @@ -206,10 +227,8 @@ class ComparisonPredicateBase : public ColumnPredicate {
return false;
}

T tmp_min_value {};
T tmp_max_value {};
memcpy((char*)(&tmp_min_value), statistic.first->cell_ptr(), sizeof(WarpperFieldType));
memcpy((char*)(&tmp_max_value), statistic.second->cell_ptr(), sizeof(WarpperFieldType));
T tmp_min_value = get_zone_map_value<Type, T>(statistic.first->cell_ptr());
T tmp_max_value = get_zone_map_value<Type, T>(statistic.second->cell_ptr());

if constexpr (PT == PredicateType::EQ) {
return tmp_min_value == _value && tmp_max_value == _value;
Expand All @@ -232,8 +251,16 @@ class ComparisonPredicateBase : public ColumnPredicate {
if constexpr (std::is_same_v<T, StringRef>) {
return bf->test_bytes(_value.data, _value.size);
} else {
return bf->test_bytes(const_cast<char*>(reinterpret_cast<const char*>(&_value)),
sizeof(WarpperFieldType));
// DecimalV2 using decimal12_t in bloom filter, should convert value to decimal12_t
if constexpr (Type == PrimitiveType::TYPE_DECIMALV2) {
decimal12_t decimal12_t_val(_value.int_value(), _value.frac_value());
return bf->test_bytes(
const_cast<char*>(reinterpret_cast<const char*>(&decimal12_t_val)),
sizeof(decimal12_t));
} else {
return bf->test_bytes(const_cast<char*>(reinterpret_cast<const char*>(&_value)),
sizeof(T));
}
}
} else {
LOG(FATAL) << "Bloom filter is not supported by predicate type.";
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/decimal12.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ struct decimal12_t {
return std::string(buf);
}

// Not modify this structure, ZoneMap use this from_string and to_string
// to serialize decimalv2 value to segment files
Status from_string(const std::string& str) {
integer = 0;
fraction = 0;
Expand Down
56 changes: 21 additions & 35 deletions be/src/olap/in_list_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,7 @@ struct std::equal_to<doris::StringRef> {
return lhs == rhs;
}
};
// for decimal12_t
template <>
struct std::hash<doris::decimal12_t> {
int64_t operator()(const doris::decimal12_t& rhs) const {
return hash<int64_t>()(rhs.integer) ^ hash<int32_t>()(rhs.fraction);
}
};

template <>
struct std::equal_to<doris::decimal12_t> {
bool operator()(const doris::decimal12_t& lhs, const doris::decimal12_t& rhs) const {
return lhs == rhs;
}
};
// for uint24_t
template <>
struct std::hash<doris::uint24_t> {
Expand Down Expand Up @@ -83,7 +70,7 @@ namespace doris {
template <PrimitiveType Type, PredicateType PT, typename HybridSetType>
class InListPredicateBase : public ColumnPredicate {
public:
using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
using T = typename PrimitiveTypeTraits<Type>::CppType;
template <typename ConditionType, typename ConvertFunc>
InListPredicateBase(uint32_t column_id, const ConditionType& conditions,
const ConvertFunc& convert, bool is_opposite = false,
Expand Down Expand Up @@ -134,9 +121,8 @@ class InListPredicateBase : public ColumnPredicate {
} else if constexpr (Type == TYPE_DECIMALV2) {
HybridSetBase::IteratorBase* iter = hybrid_set->begin();
while (iter->has_next()) {
const DecimalV2Value* value = (const DecimalV2Value*)(iter->get_value());
decimal12_t decimal12 = {value->int_value(), value->frac_value()};
_values->insert(&decimal12);
const auto* value = (const DecimalV2Value*)(iter->get_value());
_values->insert(value);
iter->next();
}
} else if constexpr (Type == TYPE_DATE) {
Expand Down Expand Up @@ -329,18 +315,8 @@ class InListPredicateBase : public ColumnPredicate {
return true;
}
if constexpr (PT == PredicateType::IN_LIST) {
if constexpr (Type == TYPE_DATE) {
T tmp_min_uint32_value = 0;
memcpy((char*)(&tmp_min_uint32_value), statistic.first->cell_ptr(),
sizeof(uint24_t));
T tmp_max_uint32_value = 0;
memcpy((char*)(&tmp_max_uint32_value), statistic.second->cell_ptr(),
sizeof(uint24_t));
return tmp_min_uint32_value <= _max_value && tmp_max_uint32_value >= _min_value;
} else {
return _get_zone_map_value<T>(statistic.first->cell_ptr()) <= _max_value &&
_get_zone_map_value<T>(statistic.second->cell_ptr()) >= _min_value;
}
return get_zone_map_value<Type, T>(statistic.first->cell_ptr()) <= _max_value &&
get_zone_map_value<Type, T>(statistic.second->cell_ptr()) >= _min_value;
} else {
return true;
}
Expand All @@ -363,16 +339,16 @@ class InListPredicateBase : public ColumnPredicate {
}
if constexpr (PT == PredicateType::NOT_IN_LIST) {
if constexpr (Type == TYPE_DATE) {
T tmp_min_uint32_value = 0;
T tmp_min_uint32_value {};
memcpy((char*)(&tmp_min_uint32_value), statistic.first->cell_ptr(),
sizeof(uint24_t));
T tmp_max_uint32_value = 0;
T tmp_max_uint32_value {};
memcpy((char*)(&tmp_max_uint32_value), statistic.second->cell_ptr(),
sizeof(uint24_t));
return tmp_min_uint32_value > _max_value || tmp_max_uint32_value < _min_value;
} else {
return _get_zone_map_value<T>(statistic.first->cell_ptr()) > _max_value ||
_get_zone_map_value<T>(statistic.second->cell_ptr()) < _min_value;
return get_zone_map_value<Type, T>(statistic.first->cell_ptr()) > _max_value ||
get_zone_map_value<Type, T>(statistic.second->cell_ptr()) < _min_value;
}
} else {
return false;
Expand All @@ -395,6 +371,16 @@ class InListPredicateBase : public ColumnPredicate {
if (bf->test_bytes(reinterpret_cast<const char*>(value), sizeof(uint24_t))) {
return true;
}
} else if constexpr (Type == PrimitiveType::TYPE_DECIMALV2) {
// DecimalV2 using decimal12_t in bloom filter in storage layer,
// should convert value to decimal12_t
const T* value = (const T*)(iter->get_value());
decimal12_t decimal12_t_val(value->int_value(), value->frac_value());
if (bf->test_bytes(
const_cast<char*>(reinterpret_cast<const char*>(&decimal12_t_val)),
sizeof(decimal12_t))) {
return true;
}
} else {
const T* value = (const T*)(iter->get_value());
if (bf->test_bytes(reinterpret_cast<const char*>(value), sizeof(*value))) {
Expand Down Expand Up @@ -602,7 +588,7 @@ ColumnPredicate* _create_in_list_predicate(uint32_t column_id, const ConditionTy
const ConvertFunc& convert, bool is_opposite = false,
const TabletColumn* col = nullptr,
vectorized::Arena* arena = nullptr) {
using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
using T = typename PrimitiveTypeTraits<Type>::CppType;
if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) {
using Set = std::conditional_t<
std::is_same_v<T, StringRef>, StringSet<FixedContainer<std::string, N>>,
Expand Down Expand Up @@ -660,7 +646,7 @@ template <PrimitiveType Type, PredicateType PT, size_t N = 0>
ColumnPredicate* _create_in_list_predicate(uint32_t column_id,
const std::shared_ptr<HybridSetBase>& hybrid_set,
size_t char_length = 0) {
using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
using T = typename PrimitiveTypeTraits<Type>::CppType;
if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) {
using Set = std::conditional_t<
std::is_same_v<T, StringRef>, StringSet<FixedContainer<std::string, N>>,
Expand Down
10 changes: 6 additions & 4 deletions be/src/olap/predicate_creator.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class PredicateCreator {
template <PrimitiveType Type, PredicateType PT, typename ConditionType>
class IntegerPredicateCreator : public PredicateCreator<ConditionType> {
public:
using CppType = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
using CppType = typename PrimitiveTypeTraits<Type>::CppType;
ColumnPredicate* create(const TabletColumn& column, int index, const ConditionType& conditions,
bool opposite, vectorized::Arena* arena) override {
if constexpr (PredicateTypeTraits::is_list(PT)) {
Expand Down Expand Up @@ -79,7 +79,7 @@ class IntegerPredicateCreator : public PredicateCreator<ConditionType> {
template <PrimitiveType Type, PredicateType PT, typename ConditionType>
class DecimalPredicateCreator : public PredicateCreator<ConditionType> {
public:
using CppType = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
using CppType = typename PrimitiveTypeTraits<Type>::CppType;
ColumnPredicate* create(const TabletColumn& column, int index, const ConditionType& conditions,
bool opposite, vectorized::Arena* arena) override {
if constexpr (PredicateTypeTraits::is_list(PT)) {
Expand Down Expand Up @@ -135,7 +135,7 @@ class StringPredicateCreator : public PredicateCreator<ConditionType> {
template <PrimitiveType Type, PredicateType PT, typename ConditionType>
struct CustomPredicateCreator : public PredicateCreator<ConditionType> {
public:
using CppType = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
using CppType = typename PrimitiveTypeTraits<Type>::CppType;
CustomPredicateCreator(const std::function<CppType(const std::string& condition)>& convert)
: _convert(convert) {}

Expand Down Expand Up @@ -183,7 +183,9 @@ std::unique_ptr<PredicateCreator<ConditionType>> get_creator(const FieldType& ty
[](const std::string& condition) {
decimal12_t value = {0, 0};
static_cast<void>(value.from_string(condition));
return value;
// Decimal12t is storage type, we need convert to compute type here to
// do comparisons
return DecimalV2Value(value.integer, value.fraction);
});
}
case FieldType::OLAP_FIELD_TYPE_DECIMAL32: {
Expand Down
Loading

0 comments on commit f088cab

Please sign in to comment.