Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 44 additions & 12 deletions be/src/vec/columns/column_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,30 @@ namespace doris::vectorized {

template <typename T>
void ColumnStr<T>::sanity_check() const {
auto count = offsets.size();
#ifndef NDEBUG
sanity_check_simple();
auto count = (int)offsets.size();
for (int i = 0; i < count; ++i) {
if (offsets[i] < offsets[i - 1]) {
throw Exception(Status::InternalError("row count: {}, offsets[{}]: {}, offsets[{}]: {}",
count, i, offsets[i], i - 1, offsets[i - 1]));
}
}
#endif
}

template <typename T>
void ColumnStr<T>::sanity_check_simple() const {
#ifndef NDEBUG
auto count = (int)offsets.size();
if (chars.size() != offsets[count - 1]) {
LOG(FATAL) << "row count: " << count << ", chars.size(): " << chars.size() << ", offset["
<< count - 1 << "]: " << offsets[count - 1];
throw Exception(Status::InternalError("row count: {}, chars.size(): {}, offset[{}]: {}",
count, chars.size(), count - 1, offsets[count - 1]));
}
if (offsets[-1] != 0) {
LOG(FATAL) << "wrong offsets[-1]: " << offsets[-1];
}
for (size_t i = 0; i < count; ++i) {
if (offsets[i] < offsets[i - 1]) {
LOG(FATAL) << "row count: " << count << ", offsets[" << i << "]: " << offsets[i]
<< ", offsets[" << i - 1 << "]: " << offsets[i - 1];
}
throw Exception(Status::InternalError("wrong offsets[-1]: {}", offsets[-1]));
}
#endif
}

template <typename T>
Expand Down Expand Up @@ -77,6 +87,8 @@ MutableColumnPtr ColumnStr<T>::clone_resized(size_t to_size) const {
res->offsets.resize_fill(to_size, chars.size());
}

res->sanity_check_simple();

return res;
}

Expand Down Expand Up @@ -135,6 +147,14 @@ void ColumnStr<T>::insert_range_from_ignore_overflow(const doris::vectorized::IC
src_concrete.offsets[start + i] - nested_offset + prev_max_offset;
}
}
#ifndef NDEBUG
auto count = int64_t(offsets.size());
// offsets may overflow, so we make chars.size() as T to do same overflow check
if (offsets.back() != T(chars.size())) {
throw Exception(Status::InternalError("row count: {}, chars.size(): {}, offset[{}]: {}",
count, chars.size(), count - 1, offsets[count - 1]));
}
#endif
}

template <typename T>
Expand Down Expand Up @@ -178,6 +198,7 @@ void ColumnStr<T>::insert_range_from(const IColumn& src, size_t start, size_t le
} else {
do_insert(assert_cast<const ColumnStr<uint32_t>&>(src));
}
sanity_check_simple();
}

template <typename T>
Expand Down Expand Up @@ -244,6 +265,7 @@ void ColumnStr<T>::insert_indices_from(const IColumn& src, const uint32_t* indic
} else {
do_insert(assert_cast<const ColumnStr<uint32_t>&>(src));
}
sanity_check_simple();
}

template <typename T>
Expand Down Expand Up @@ -297,7 +319,9 @@ size_t ColumnStr<T>::filter(const IColumn::Filter& filter) {
}

if constexpr (std::is_same_v<UInt32, T>) {
return filter_arrays_impl<UInt8, IColumn::Offset>(chars, offsets, filter);
auto res = filter_arrays_impl<UInt8, IColumn::Offset>(chars, offsets, filter);
sanity_check_simple();
return res;
} else {
throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
"should not call filter in ColumnStr<UInt64>");
Expand Down Expand Up @@ -373,7 +397,7 @@ ColumnPtr ColumnStr<T>::permute(const IColumn::Permutation& perm, size_t limit)
current_new_offset += string_size;
res_offsets[i] = current_new_offset;
}

sanity_check_simple();
return res;
}

Expand Down Expand Up @@ -405,6 +429,7 @@ const char* ColumnStr<T>::deserialize_and_insert_from_arena(const char* pos) {
memcpy(chars.data() + old_size, pos, string_size);

offsets.push_back(new_size);
sanity_check_simple();
return pos + string_size;
}

Expand Down Expand Up @@ -495,6 +520,7 @@ void ColumnStr<T>::deserialize_vec_with_null_map(std::vector<StringRef>& keys,
insert_default();
}
}
sanity_check_simple();
}

template <typename T>
Expand Down Expand Up @@ -558,30 +584,35 @@ ColumnPtr ColumnStr<T>::replicate(const IColumn::Offsets& replicate_offsets) con
res_chars.resize(res_chars.size() + string_size);
memcpy_small_allow_read_write_overflow15(&res_chars[res_chars.size() - string_size],
&chars[prev_string_offset], string_size);
check_chars_length(res_chars.size(), res_offsets.size());
}

prev_replicate_offset = replicate_offsets[i];
prev_string_offset = offsets[i];
}

check_chars_length(res_chars.size(), res_offsets.size());
sanity_check_simple();
return res;
}

template <typename T>
void ColumnStr<T>::reserve(size_t n) {
offsets.reserve(n);
chars.reserve(n);
sanity_check_simple();
}

template <typename T>
void ColumnStr<T>::resize(size_t n) {
auto origin_size = size();
if (origin_size > n) {
offsets.resize(n);
chars.resize(offsets[n - 1]);
} else if (origin_size < n) {
insert_many_defaults(n - origin_size);
}
sanity_check_simple();
}

template <typename T>
Expand Down Expand Up @@ -637,6 +668,7 @@ ColumnPtr ColumnStr<T>::convert_column_if_overflow() {
}

offsets.clear();
new_col->sanity_check_simple();
return new_col;
}
return this->get_ptr();
Expand Down
12 changes: 12 additions & 0 deletions be/src/vec/columns/column_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
bool is_variable_length() const override { return true; }
// used in string ut testd
void sanity_check() const;
void sanity_check_simple() const;
const char* get_family_name() const override { return "String"; }

size_t size() const override { return offsets.size(); }
Expand Down Expand Up @@ -162,6 +163,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
chars.resize(new_size);
memcpy(chars.data() + old_size, s.data, size_to_append);
offsets.push_back(new_size);
sanity_check_simple();
}

void insert_many_from(const IColumn& src, size_t position, size_t length) override;
Expand All @@ -188,6 +190,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
size_to_append);
offsets.push_back(new_size);
}
sanity_check_simple();
}

void insert_data(const char* pos, size_t length) override {
Expand All @@ -200,6 +203,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
memcpy(chars.data() + old_size, pos, length);
}
offsets.push_back(new_size);
sanity_check_simple();
}

void insert_data_without_reserve(const char* pos, size_t length) {
Expand All @@ -212,6 +216,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
memcpy(chars.data() + old_size, pos, length);
}
offsets.push_back_without_reserve(new_size);
sanity_check_simple();
}

/// Before insert strings, the caller should calculate the total size of strings,
Expand Down Expand Up @@ -245,6 +250,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
}
check_chars_length(offset, offsets.size());
chars.resize(offset);
sanity_check_simple();
}

void insert_many_continuous_binary_data(const char* data, const uint32_t* offsets_,
Expand All @@ -270,6 +276,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
offsets_ptr[i] = tail_offset + offsets_[i + 1] - begin_offset;
}
DCHECK(chars.size() == offsets.back());
sanity_check_simple();
}

void insert_many_binary_data(char* data_array, uint32_t* len_array,
Expand All @@ -293,6 +300,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
offset += len;
offsets.push_back(offset);
}
sanity_check_simple();
}

void insert_many_strings(const StringRef* strings, size_t num) override {
Expand All @@ -315,6 +323,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
}
offsets.push_back(offset);
}
sanity_check_simple();
}

template <size_t copy_length>
Expand All @@ -339,6 +348,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
offsets.push_back(offset);
}
chars.resize(old_size + new_size);
sanity_check_simple();
}

void insert_many_strings_overflow(const StringRef* strings, size_t num,
Expand Down Expand Up @@ -380,12 +390,14 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
memcpy(chars.data() + old_size, src.data, src.size);
old_size += src.size;
}
sanity_check_simple();
}

void pop_back(size_t n) override {
size_t nested_n = offsets.back() - offset_at(offsets.size() - n);
chars.resize(chars.size() - nested_n);
offsets.resize_assume_reserved(offsets.size() - n);
sanity_check_simple();
}

StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override;
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/functions/date_time_transforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ struct TransformerToStringOneArgument {
res_offsets[i] = Transform::execute(date_time_value, res_data, offset);
null_map[i] = !date_time_value.is_valid_date();
}
res_data.resize(res_offsets[res_offsets.size() - 1]);
}

static void vector(FunctionContext* context,
Expand All @@ -334,6 +335,7 @@ struct TransformerToStringOneArgument {
res_offsets[i] = Transform::execute(date_time_value, res_data, offset);
DCHECK(date_time_value.is_valid_date());
}
res_data.resize(res_offsets[res_offsets.size() - 1]);
}
};

Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/functions/function_decode_varchar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class FunctionDecodeAsVarchar : public IFunction {
simd::reverse_copy_bytes(col_res_data.data() + col_res_offset[i - 1], str_size,
ui8_ptr + sizeof(IntegerType) - str_size, str_size);
}

col_res_data.resize(col_res_offset[col_res_offset.size() - 1]);
block.get_by_position(result).column = std::move(col_res);

return Status::OK();
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/functions/function_ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ class FunctionIPv6NumToString : public IFunction {
process_ipv6_column<ColumnString>(column, input_rows_count, vec_res, offsets_res,
null_map, ipv6_address_data);
}
vec_res.resize(offsets_res[offsets_res.size() - 1]);

block.replace_by_position(result,
ColumnNullable::create(std::move(col_res), std::move(null_map)));
Expand Down Expand Up @@ -1388,6 +1389,7 @@ class FunctionCutIPv6 : public IFunction {
cut_address(address, pos, bytes_to_cut_count);
offsets_res[i] = pos - begin;
}
chars_res.resize(offsets_res[offsets_res.size() - 1]);

block.replace_by_position(result, std::move(col_res));
return Status::OK();
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/functions/function_uuid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ class FunctionInttoUuid : public IFunction {
col_offset[row] = col_offset[row - 1] + str_length;
deserialize((char*)arg, col_data.data() + str_length * row);
}
col_data.resize(str_length * input_rows_count);
block.replace_by_position(result, std::move(result_column));
return Status::OK();
}
Expand Down
Loading