Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions be/src/agent/task_worker_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,9 @@ void TaskWorkerPool::_alter_tablet(TaskWorkerPool* worker_pool_this,
&error_msgs, process_name);
OLAPStatus sc_status = worker_pool_this->_env->storage_engine()->execute_task(&engine_task);
if (sc_status != OLAP_SUCCESS) {
if (sc_status == OLAP_ERR_DATA_QUALITY_ERR) {
error_msgs.push_back("The data quality does not satisfy, please check your data. ");
}
status = DORIS_ERROR;
} else {
status = DORIS_SUCCESS;
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/olap_define.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ enum OLAPStatus {
OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED = -1603,
OLAP_ERR_SCHEMA_CHANGE_INFO_INVALID = -1604,
OLAP_ERR_QUERY_SPLIT_KEY_ERR = -1605,
//Error caused by a data quality issue during schema change/materialized view
OLAP_ERR_DATA_QUALITY_ERR = -1606,

// Column File
// [-1700, -1800)
Expand Down
102 changes: 47 additions & 55 deletions be/src/olap/schema_change.cpp

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions be/src/olap/schema_change.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class RowBlockChanger {

SchemaMapping get_schema_mapping() const { return _schema_mapping; }

bool change_row_block(const RowBlock* ref_block, int32_t data_version, RowBlock* mutable_block,
OLAPStatus change_row_block(const RowBlock* ref_block, int32_t data_version, RowBlock* mutable_block,
uint64_t* filtered_rows) const;

private:
Expand Down Expand Up @@ -92,7 +92,7 @@ class SchemaChange {
SchemaChange() : _filtered_rows(0), _merged_rows(0) {}
virtual ~SchemaChange() {}

virtual bool process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_builder,
virtual OLAPStatus process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_builder,
TabletSharedPtr tablet, TabletSharedPtr base_tablet) = 0;

void add_filtered_rows(uint64_t filtered_rows) { _filtered_rows += filtered_rows; }
Expand All @@ -118,7 +118,7 @@ class LinkedSchemaChange : public SchemaChange {
: _row_block_changer(row_block_changer) {}
~LinkedSchemaChange() {}

bool process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_writer,
virtual OLAPStatus process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_writer,
TabletSharedPtr new_tablet, TabletSharedPtr base_tablet) override;

private:
Expand All @@ -134,7 +134,7 @@ class SchemaChangeDirectly : public SchemaChange {
explicit SchemaChangeDirectly(const RowBlockChanger& row_block_changer);
virtual ~SchemaChangeDirectly();

virtual bool process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_writer,
virtual OLAPStatus process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_writer,
TabletSharedPtr new_tablet, TabletSharedPtr base_tablet) override;

private:
Expand All @@ -154,7 +154,7 @@ class SchemaChangeWithSorting : public SchemaChange {
size_t memory_limitation);
virtual ~SchemaChangeWithSorting();

virtual bool process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_builder,
virtual OLAPStatus process(RowsetReaderSharedPtr rowset_reader, RowsetWriter* new_rowset_builder,
TabletSharedPtr new_tablet, TabletSharedPtr base_tablet) override;

private:
Expand Down
81 changes: 80 additions & 1 deletion be/test/olap/schema_change_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,6 @@ TEST_F(TestColumn, ConvertIntToBitmap) {
column_mapping->ref_column = 2;
column_mapping->materialized_function = "to_bitmap";


RowBlock mutable_block(&mv_tablet_schema);
mutable_block.init(block_info);
uint64_t filtered_rows = 0;
Expand Down Expand Up @@ -878,6 +877,86 @@ TEST_F(TestColumn, ConvertCharToHLL) {
HyperLogLog hll(*dst_slice);
ASSERT_EQ(hll.estimate_cardinality(), 1);
}

TEST_F(TestColumn, ConvertCharToCount) {
//Base Tablet
TabletSchema tablet_schema;
CreateTabletSchema(tablet_schema);

//Base row block
CreateColumnWriter(tablet_schema);

RowBlock block(&tablet_schema);
RowBlockInfo block_info;
block_info.row_num = 10000;
block.init(block_info);

RowCursor write_row;
write_row.init(tablet_schema);
write_row.allocate_memory_for_string_type(tablet_schema);
std::vector<std::string> val_string_array;
val_string_array.emplace_back("1");
val_string_array.emplace_back("1");
val_string_array.emplace_back("2");
val_string_array.emplace_back("3");
OlapTuple tuple(val_string_array);
write_row.from_tuple(tuple);
block.set_row(0, write_row);

block.finalize(1);
ColumnDataHeaderMessage header;
ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);

//Materialized View tablet schema
TabletSchemaPB mv_tablet_schema_pb;
mv_tablet_schema_pb.set_keys_type(KeysType::AGG_KEYS);
mv_tablet_schema_pb.set_num_short_key_columns(2);
mv_tablet_schema_pb.set_num_rows_per_row_block(1024);
mv_tablet_schema_pb.set_compress_kind(COMPRESS_NONE);
mv_tablet_schema_pb.set_next_column_unique_id(3);

ColumnPB* mv_column_1 = mv_tablet_schema_pb.add_column();
mv_column_1->set_unique_id(1);
mv_column_1->set_name("k1");
mv_column_1->set_type("INT");
mv_column_1->set_is_key(true);
mv_column_1->set_length(4);
mv_column_1->set_index_length(4);
mv_column_1->set_is_nullable(false);
mv_column_1->set_is_bf_column(false);

ColumnPB* mv_column_2 = mv_tablet_schema_pb.add_column();
mv_column_2->set_unique_id(2);
mv_column_2->set_name("v1");
mv_column_2->set_type("BIGINT");
mv_column_2->set_length(4);
mv_column_2->set_is_key(false);
mv_column_2->set_is_nullable(false);
mv_column_2->set_is_bf_column(false);
mv_column_2->set_aggregation("SUM");

TabletSchema mv_tablet_schema;
mv_tablet_schema.init_from_pb(mv_tablet_schema_pb);

RowBlockChanger row_block_changer(mv_tablet_schema);
ColumnMapping* column_mapping = row_block_changer.get_mutable_column_mapping(0);
column_mapping->ref_column = 0;
column_mapping = row_block_changer.get_mutable_column_mapping(1);
column_mapping->ref_column = 1;
column_mapping->materialized_function = "count_field";

RowBlock mutable_block(&mv_tablet_schema);
mutable_block.init(block_info);
uint64_t filtered_rows = 0;
row_block_changer.change_row_block(&block, 0, &mutable_block, &filtered_rows);

RowCursor mv_row_cursor;
mv_row_cursor.init(mv_tablet_schema);
mutable_block.get_row(0, &mv_row_cursor);

auto dst = mv_row_cursor.cell_ptr(1);
ASSERT_EQ(*(int64_t *)dst, 1);
}
}

int main(int argc, char** argv) {
Expand Down
5 changes: 5 additions & 0 deletions docs/en/administrator-guide/materialized_view.md
Original file line number Diff line number Diff line change
Expand Up @@ -483,3 +483,8 @@ This problem can be solved by creating a materialized view with k3 as the first
2. If the conditional column of the delete statement does not exist in the materialized view, the delete operation cannot be performed. If you must delete data, you need to delete the materialized view before deleting the data.
3. Too many materialized views on a single table will affect the efficiency of importing: When importing data, the materialized view and base table data are updated synchronously. If a table has more than 10 materialized view tables, it may cause the import speed to be very high. slow. This is the same as a single import needs to import 10 tables at the same time.
4. The same column with different aggregate functions cannot appear in a materialized view at the same time. For example, select sum(a), min(a) from table are not supported.

## Error
1. DATA_QUALITY_ERR: "The data quality does not satisfy, please check your data"
Materialized view creation failed due to data quality issues.
Note: The bitmap type only supports positive integers. If there are negative Numbers in the original data, the materialized view will fail to be created
5 changes: 5 additions & 0 deletions docs/zh-CN/administrator-guide/materialized_view.md
Original file line number Diff line number Diff line change
Expand Up @@ -485,3 +485,8 @@ MySQL [test]> desc advertiser_view_record;
2. 如果删除语句的条件列,在物化视图中不存在,则不能进行删除操作。如果一定要删除数据,则需要先将物化视图删除,然后方可删除数据。
3. 单表上过多的物化视图会影响导入的效率:导入数据时,物化视图和 base 表数据是同步更新的,如果一张表的物化视图表超过10张,则有可能导致导入速度很慢。这就像单次导入需要同时导入10张表数据是一样的。
4. 相同列,不同聚合函数,不能同时出现在一张物化视图中,比如:select sum(a), min(a) from table 不支持。

## 异常错误
1. DATA_QUALITY_ERR: "The data quality does not satisfy, please check your data"
由于数据质量问题导致物化视图创建失败。
注意:bitmap类型仅支持正整型, 如果原始数据中存在负数,会导致物化视图创建失败
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ public TMasterResult finishTask(TFinishTaskRequest request) {
if (taskStatus.getStatus_code() != TStatusCode.OK) {
task.failed();
String errMsg = "task type: " + taskType + ", status_code: " + taskStatus.getStatus_code().toString() +
(taskStatus.isSetError_msgs() ? (", status_message: " + taskStatus.getError_msgs()) : "") +
", backendId: " + backend + ", signature: " + signature;
task.setErrorMsg(errMsg);
// We start to let FE perceive the task's error msg
Expand Down