Skip to content

Commit

Permalink
[fix](inverted index) multi match distinguishes the inverted index v1…
Browse files Browse the repository at this point in the history
… and v2 (apache#39149)

## Proposed changes

1. reversed Index v1 and v2 have different column names
  • Loading branch information
zzzxl1993 authored and wyxxxcat committed Aug 14, 2024
1 parent 2172916 commit a653945
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 48 deletions.
12 changes: 9 additions & 3 deletions be/src/vec/functions/function_multi_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,15 @@ Status FunctionMultiMatch::eval_inverted_index(FunctionContext* context,

auto single_result = std::make_shared<roaring::Roaring>();
StringRef query_value(match_param->query.data());
RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state,
std::to_string(column.unique_id()), &query_value,
query_type, single_result));
auto index_version = tablet_schema->get_inverted_index_storage_format();
if (index_version == InvertedIndexStorageFormatPB::V1) {
RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state, column_name,
&query_value, query_type, single_result));
} else if (index_version == InvertedIndexStorageFormatPB::V2) {
RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state,
std::to_string(column.unique_id()), &query_value,
query_type, single_result));
}
(*result) |= (*single_result);
}

Expand Down
24 changes: 24 additions & 0 deletions regression-test/data/inverted_index_p0/test_index_multi_match.out
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,27 @@
-- !sql --
44

-- !sql --
178

-- !sql --
180

-- !sql --
859

-- !sql --
44

-- !sql --
178

-- !sql --
180

-- !sql --
859

-- !sql --
44

Original file line number Diff line number Diff line change
Expand Up @@ -19,51 +19,37 @@
suite("test_index_multi_match", "p0"){
def indexTbName1 = "test_index_multi_match_1"
def indexTbName2 = "test_index_multi_match_2"
def indexTbName3 = "test_index_multi_match_3"
def indexTbName4 = "test_index_multi_match_4"

sql "DROP TABLE IF EXISTS ${indexTbName1}"
sql "DROP TABLE IF EXISTS ${indexTbName2}"
sql "DROP TABLE IF EXISTS ${indexTbName3}"
sql "DROP TABLE IF EXISTS ${indexTbName4}"

sql """
CREATE TABLE ${indexTbName1} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` text NULL COMMENT "",
`request` text NULL COMMENT "",
`status` text NULL COMMENT "",
`size` text NULL COMMENT "",
INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""

sql """
CREATE TABLE ${indexTbName2} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` text NULL COMMENT "",
`request` text NULL COMMENT "",
`status` text NULL COMMENT "",
`size` text NULL COMMENT "",
INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""
def create_table = {table_name, idx_version ->
sql """
CREATE TABLE ${table_name} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` text NULL COMMENT "",
`request` text NULL COMMENT "",
`status` text NULL COMMENT "",
`size` text NULL COMMENT "",
INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"inverted_index_storage_format" = "${idx_version}",
"disable_auto_compaction" = "true"
);
"""
}

def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
Expand Down Expand Up @@ -103,20 +89,39 @@ suite("test_index_multi_match", "p0"){
}

try {
create_table(indexTbName1, 'V1')
create_table(indexTbName2, 'V2')
create_table(indexTbName3, 'V1')
create_table(indexTbName4, 'V2')

load_httplogs_data.call(indexTbName1, 'test_index_multi_match_1', 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(indexTbName2, 'test_index_multi_match_2', 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(indexTbName3, 'test_index_multi_match_3', 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(indexTbName4, 'test_index_multi_match_4', 'true', 'json', 'documents-1000.json')

sql "sync"

sql """ set enable_common_expr_pushdown = true """

qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2' or status match_phrase_prefix '2' or size match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """

qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, '', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, 'request', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2' or status match_phrase_prefix '2' or size match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """

qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, '', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, 'request', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """

qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, '', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, 'request', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """

} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
Expand Down

0 comments on commit a653945

Please sign in to comment.