Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 41 additions & 23 deletions be/src/vec/columns/column_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,30 @@ namespace doris::vectorized {

template <typename T>
void ColumnStr<T>::sanity_check() const {
#ifndef NDEBUG
sanity_check_simple();
auto count = offsets.size();
if (chars.size() != offsets[count - 1]) {
throw Exception(Status::FatalError("row count: {}, chars.size(): {}, offset[{}]: ", count,
chars.size(), count - 1, offsets[count - 1]));
}
if (offsets[-1] != 0) {
throw Exception(Status::FatalError("wrong offsets[-1]: {}", offsets[-1]));
}
for (size_t i = 0; i < count; ++i) {
if (offsets[i] < offsets[i - 1]) {
throw Exception(Status::FatalError("row count: {}, offsets[{}]: {}, offsets[{}]: {}",
count, i, offsets[i], i - 1, offsets[i - 1]));
throw Exception(Status::InternalError("row count: {}, offsets[{}]: {}, offsets[{}]: {}",
count, i, offsets[i], i - 1, offsets[i - 1]));
}
}
#endif
}

template <typename T>
void ColumnStr<T>::sanity_check_simple() const {
#ifndef NDEBUG
auto count = offsets.size();
if (chars.size() != offsets[count - 1]) {
throw Exception(Status::InternalError("row count: {}, chars.size(): {}, offset[{}]: ",
count, chars.size(), offsets[count - 1]));
}
if (offsets[-1] != 0) {
throw Exception(Status::InternalError("wrong offsets[-1]: {}", offsets[-1]));
}
#endif
}

template <typename T>
Expand Down Expand Up @@ -147,6 +157,7 @@ void ColumnStr<T>::insert_range_from_ignore_overflow(const doris::vectorized::IC
src_concrete.offsets[start + i] - nested_offset + prev_max_offset;
}
}
sanity_check_simple();
}

template <typename T>
Expand Down Expand Up @@ -201,6 +212,7 @@ void ColumnStr<T>::insert_range_from(const IColumn& src, size_t start, size_t le
} else {
do_insert(assert_cast<const ColumnStr<uint32_t>&>(src));
}
sanity_check_simple();
}

template <typename T>
Expand All @@ -223,6 +235,7 @@ void ColumnStr<T>::insert_many_from(const IColumn& src, size_t position, size_t
offsets[start_pos] = static_cast<T>(prev_pos + data_length);
prev_pos = prev_pos + data_length;
}
sanity_check_simple();
}

template <typename T>
Expand Down Expand Up @@ -268,6 +281,7 @@ void ColumnStr<T>::insert_indices_from(const IColumn& src, const uint32_t* indic
} else {
do_insert(assert_cast<const ColumnStr<uint32_t>&>(src));
}
sanity_check_simple();
}

template <typename T>
Expand Down Expand Up @@ -308,6 +322,7 @@ ColumnPtr ColumnStr<T>::filter(const IColumn::Filter& filt, ssize_t result_size_

filter_arrays_impl<UInt8, IColumn::Offset>(chars, offsets, res_chars, res_offsets, filt,
result_size_hint);
sanity_check_simple();
return res;
} else {
throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
Expand All @@ -324,7 +339,9 @@ size_t ColumnStr<T>::filter(const IColumn::Filter& filter) {
}

if constexpr (std::is_same_v<UInt32, T>) {
return filter_arrays_impl<UInt8, IColumn::Offset>(chars, offsets, filter);
auto res = filter_arrays_impl<UInt8, IColumn::Offset>(chars, offsets, filter);
sanity_check();
return res;
} else {
throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
"should not call filter in ColumnStr<UInt64>");
Expand All @@ -344,6 +361,7 @@ Status ColumnStr<T>::filter_by_selector(const uint16_t* sel, size_t sel_size, IC
}
filter_arrays_impl<UInt8, IColumn::Offset>(chars, offsets, res_chars, res_offsets, filter,
sel_size);
sanity_check();
return Status::OK();
} else {
return Status::InternalError("should not call filter_by_selector in ColumnStr<UInt64>");
Expand Down Expand Up @@ -400,7 +418,7 @@ ColumnPtr ColumnStr<T>::permute(const IColumn::Permutation& perm, size_t limit)
current_new_offset += string_size;
res_offsets[i] = current_new_offset;
}

sanity_check();
return res;
}

Expand Down Expand Up @@ -432,6 +450,7 @@ const char* ColumnStr<T>::deserialize_and_insert_from_arena(const char* pos) {
memcpy(chars.data() + old_size, pos, string_size);

offsets.push_back(new_size);
sanity_check_simple();
return pos + string_size;
}

Expand Down Expand Up @@ -569,28 +588,25 @@ ColumnPtr ColumnStr<T>::replicate(const IColumn::Offsets& replicate_offsets) con
res_chars.reserve(chars.size() / col_size * replicate_offsets.back());
res_offsets.reserve(replicate_offsets.back());

T prev_replicate_offset = 0;
T prev_string_offset = 0;
T current_new_offset = 0;

for (size_t i = 0; i < col_size; ++i) {
T size_to_replicate = replicate_offsets[i] - prev_replicate_offset;
T string_size = offsets[i] - prev_string_offset;
size_t size_to_replicate = replicate_offsets[i] - replicate_offsets[i - 1];
T string_size = offsets[i] - offsets[i - 1];

check_chars_length(res_chars.size() + size_to_replicate * string_size,
res_offsets.size() + size_to_replicate, col_size);

res_chars.resize(res_chars.size() + size_to_replicate * string_size);
for (size_t j = 0; j < size_to_replicate; ++j) {
memcpy_small_allow_read_write_overflow15(&res_chars[current_new_offset],
&chars[offsets[i - 1]], string_size);
current_new_offset += string_size;
res_offsets.push_back(current_new_offset);

res_chars.resize(res_chars.size() + string_size);
memcpy_small_allow_read_write_overflow15(&res_chars[res_chars.size() - string_size],
&chars[prev_string_offset], string_size);
}

prev_replicate_offset = replicate_offsets[i];
prev_string_offset = offsets[i];
}

check_chars_length(res_chars.size(), res_offsets.size(), col_size);
sanity_check_simple();
return res;
}

Expand All @@ -605,9 +621,11 @@ void ColumnStr<T>::resize(size_t n) {
auto origin_size = size();
if (origin_size > n) {
offsets.resize(n);
chars.resize(offsets[n - 1]);
} else if (origin_size < n) {
insert_many_defaults(n - origin_size);
}
sanity_check_simple();
}

template <typename T>
Expand Down
14 changes: 14 additions & 0 deletions be/src/vec/columns/column_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
bool is_variable_length() const override { return true; }
// used in string ut testd
void sanity_check() const;
void sanity_check_simple() const;

std::string get_name() const override { return "String"; }

size_t size() const override { return offsets.size(); }
Expand Down Expand Up @@ -166,6 +168,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
chars.resize(new_size);
memcpy(chars.data() + old_size, s.data, size_to_append);
offsets.push_back(new_size);
sanity_check_simple();
}

void insert_many_from(const IColumn& src, size_t position, size_t length) override;
Expand All @@ -192,6 +195,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
size_to_append);
offsets.push_back(new_size);
}
sanity_check_simple();
}

void insert_data(const char* pos, size_t length) override {
Expand All @@ -204,6 +208,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
memcpy(chars.data() + old_size, pos, length);
}
offsets.push_back(new_size);
sanity_check_simple();
}

void insert_data_without_reserve(const char* pos, size_t length) {
Expand All @@ -216,6 +221,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
memcpy(chars.data() + old_size, pos, length);
}
offsets.push_back_without_reserve(new_size);
sanity_check_simple();
}

/// Before insert strings, the caller should calculate the total size of strings,
Expand Down Expand Up @@ -249,6 +255,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
}
check_chars_length(offset, offsets.size());
chars.resize(offset);
sanity_check_simple();
}

void insert_many_continuous_binary_data(const char* data, const uint32_t* offsets_,
Expand All @@ -274,6 +281,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
offsets_ptr[i] = tail_offset + offsets_[i + 1] - begin_offset;
}
DCHECK(chars.size() == offsets.back());
sanity_check_simple();
}

void insert_many_strings(const StringRef* strings, size_t num) override {
Expand All @@ -296,6 +304,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
}
offsets.push_back(offset);
}
sanity_check_simple();
}

template <size_t copy_length>
Expand All @@ -320,6 +329,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
offsets.push_back(offset);
}
chars.resize(old_size + new_size);
sanity_check_simple();
}

void insert_many_strings_overflow(const StringRef* strings, size_t num,
Expand All @@ -337,6 +347,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
} else {
insert_many_strings(strings, num);
}
sanity_check_simple();
}

void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict,
Expand Down Expand Up @@ -367,12 +378,14 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
memcpy(chars.data() + old_size, src.data, src.size);
old_size += src.size;
}
sanity_check_simple();
}

void pop_back(size_t n) override {
size_t nested_n = offsets.back() - offset_at(offsets.size() - n);
chars.resize(chars.size() - nested_n);
offsets.resize_assume_reserved(offsets.size() - n);
sanity_check_simple();
}

StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override;
Expand Down Expand Up @@ -489,6 +502,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {

void insert_many_defaults(size_t length) override {
offsets.resize_fill(offsets.size() + length, static_cast<T>(chars.size()));
sanity_check_simple();
}

int compare_at(size_t n, size_t m, const IColumn& rhs_,
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/functions/function_decode_varchar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class FunctionDecodeAsVarchar : public IFunction {
simd::reverse_copy_bytes(col_res_data.data() + col_res_offset[i - 1], str_size,
ui8_ptr + sizeof(IntegerType) - str_size, str_size);
}
col_res_data.resize(col_res_offset[col_res_offset.size() - 1]);

block.get_by_position(result).column = std::move(col_res);

Expand Down
3 changes: 3 additions & 0 deletions be/src/vec/functions/function_ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ class FunctionIPv6NumToString : public IFunction {
process_ipv6_column<ColumnString>(column, input_rows_count, vec_res, offsets_res,
null_map, ipv6_address_data);
}
vec_res.resize(offsets_res[offsets_res.size() - 1]);

block.replace_by_position(result,
ColumnNullable::create(std::move(col_res), std::move(null_map)));
Expand Down Expand Up @@ -1319,6 +1320,8 @@ class FunctionCutIPv6 : public IFunction {
offsets_res[i] = cast_set<uint32_t>(pos - begin);
}

chars_res.resize(offsets_res[offsets_res.size() - 1]);

block.replace_by_position(result, std::move(col_res));
return Status::OK();
}
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/functions/function_uuid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ class FunctionInttoUuid : public IFunction {
col_offset[row] = col_offset[row - 1] + str_length;
deserialize((char*)arg, col_data.data() + str_length * row);
}
col_data.resize(str_length * input_rows_count);
block.replace_by_position(result, std::move(result_column));
return Status::OK();
}
Expand Down
Loading