Skip to content

Commit 4d2e7ba

Browse files
authored
DataShard fulltext index build scan (#25028)
1 parent 4401bf9 commit 4d2e7ba

File tree

16 files changed

+929
-56
lines changed

16 files changed

+929
-56
lines changed

ydb/core/base/fulltext.cpp

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -137,27 +137,40 @@ TVector<TString> Analyze(const TString& text, const Ydb::Table::FulltextIndexSet
137137
return tokens;
138138
}
139139

140-
bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString& error) {
140+
bool ValidateSettings(const NProtoBuf::RepeatedPtrField<TString>& keyColumns, const Ydb::Table::FulltextIndexSettings& settings, TString& error) {
141141
if (!settings.has_layout() || settings.layout() == Ydb::Table::FulltextIndexSettings::LAYOUT_UNSPECIFIED) {
142142
error = "layout should be set";
143143
return false;
144144
}
145145

146-
if (settings.columns().size() != 1) {
147-
error = TStringBuilder() << "fulltext index should have single column settings"
148-
<< " but have " << settings.columns().size() << " of them";
149-
return false;
150-
}
151-
152146
for (auto column : settings.columns()) {
153147
if (!column.has_column()) {
154-
error = "column should be set";
148+
error = "fulltext index settings should have a column name";
155149
return false;
156150
}
157151
if (!column.has_analyzers()) {
158-
error = "column analyzers should be set";
152+
error = "fulltext index settings should have analyzers";
159153
return false;
160154
}
155+
}
156+
157+
if (keyColumns.size() != 1) {
158+
error = TStringBuilder() << "fulltext index should have a single text key column"
159+
<< " but have " << keyColumns.size() << " of them";
160+
return false;
161+
}
162+
if (settings.columns().size() != 1) {
163+
error = TStringBuilder() << "fulltext index should have a single text key column settings"
164+
<< " but have " << settings.columns().size() << " of them";
165+
return false;
166+
}
167+
if (keyColumns.at(0) != settings.columns().at(0).column()) {
168+
error = TStringBuilder() << "fulltext index should have a single text key column " << keyColumns.at(0) << " settings"
169+
<< " but have " << settings.columns().at(0).column();
170+
return false;
171+
}
172+
173+
for (auto column : settings.columns()) {
161174
if (!ValidateSettings(column.analyzers(), error)) {
162175
return false;
163176
}
@@ -167,7 +180,7 @@ bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString
167180
return true;
168181
}
169182

170-
Ydb::Table::FulltextIndexSettings FillSettings(const TString& column, const TVector<std::pair<TString, TString>>& settings, TString& error) {
183+
Ydb::Table::FulltextIndexSettings FillSettings(const TString& keyColumn, const TVector<std::pair<TString, TString>>& settings, TString& error) {
171184
Ydb::Table::FulltextIndexSettings result;
172185
Ydb::Table::FulltextIndexSettings::Analyzers resultAnalyzers;
173186

@@ -209,11 +222,16 @@ Ydb::Table::FulltextIndexSettings FillSettings(const TString& column, const TVec
209222
{
210223
// only single-columned index is supported for now
211224
auto columnAnalyzers = result.add_columns();
212-
columnAnalyzers->set_column(column);
225+
columnAnalyzers->set_column(keyColumn);
213226
columnAnalyzers->mutable_analyzers()->CopyFrom(resultAnalyzers);
214227
}
215228

216-
ValidateSettings(result, error);
229+
{
230+
NProtoBuf::RepeatedPtrField<TString> keyColumns;
231+
TString keyColumn_ = keyColumn;
232+
keyColumns.Add(std::move(keyColumn_));
233+
ValidateSettings(keyColumns, result, error);
234+
}
217235

218236
return result;
219237
}

ydb/core/base/fulltext.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ namespace NKikimr::NFulltext {
88

99
TVector<TString> Analyze(const TString& text, const Ydb::Table::FulltextIndexSettings::Analyzers& settings);
1010

11-
bool ValidateSettings(const Ydb::Table::FulltextIndexSettings& settings, TString& error);
12-
Ydb::Table::FulltextIndexSettings FillSettings(const TString& column, const TVector<std::pair<TString, TString>>& values, TString& error);
11+
bool ValidateSettings(const NProtoBuf::RepeatedPtrField<TString>& keyColumns, const Ydb::Table::FulltextIndexSettings& settings, TString& error);
12+
Ydb::Table::FulltextIndexSettings FillSettings(const TString& keyColumn, const TVector<std::pair<TString, TString>>& values, TString& error);
1313

1414
}

ydb/core/base/table_index.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ TClusterId SetPostingParentFlag(TClusterId parent);
8383

8484
namespace NFulltext {
8585
// TODO: support utf-8 in fulltext index
86+
inline constexpr auto TokenType = Ydb::Type::STRING;
8687
inline constexpr const char* TokenTypeName = "String";
8788

8889
inline constexpr const char* TokenColumn = "__ydb_token";

ydb/core/base/ut/fulltext_ut.cpp

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,51 @@ Y_UNIT_TEST_SUITE(NFulltext) {
1010
Ydb::Table::FulltextIndexSettings settings;
1111
TString error;
1212

13-
UNIT_ASSERT(!ValidateSettings(settings, error));
13+
NProtoBuf::RepeatedPtrField<TString> keyColumns;
14+
keyColumns.Add("text");
15+
16+
UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
1417
UNIT_ASSERT_VALUES_EQUAL(error, "layout should be set");
1518
settings.set_layout(Ydb::Table::FulltextIndexSettings::FLAT);
1619

17-
UNIT_ASSERT(!ValidateSettings(settings, error));
18-
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have single column settings but have 0 of them");
20+
UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
21+
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column settings but have 0 of them");
1922
auto columnSettings = settings.add_columns();
2023

21-
UNIT_ASSERT(!ValidateSettings(settings, error));
22-
UNIT_ASSERT_VALUES_EQUAL(error, "column should be set");
24+
UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
25+
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index settings should have a column name");
2326
columnSettings->set_column("text");
2427

25-
UNIT_ASSERT(!ValidateSettings(settings, error));
26-
UNIT_ASSERT_VALUES_EQUAL(error, "column analyzers should be set");
28+
UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
29+
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index settings should have analyzers");
2730
auto columnAnalyzers = columnSettings->mutable_analyzers();
2831

29-
UNIT_ASSERT(!ValidateSettings(settings, error));
32+
UNIT_ASSERT(!ValidateSettings(keyColumns, settings, error));
3033
UNIT_ASSERT_VALUES_EQUAL(error, "tokenizer should be set");
3134
columnAnalyzers->set_tokenizer(Ydb::Table::FulltextIndexSettings::STANDARD);
3235

33-
UNIT_ASSERT_C(ValidateSettings(settings, error), error);
36+
{
37+
NProtoBuf::RepeatedPtrField<TString> keyColumns;
38+
UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error);
39+
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column but have 0 of them");
40+
}
41+
42+
{
43+
NProtoBuf::RepeatedPtrField<TString> keyColumns;
44+
keyColumns.Add("text2");
45+
UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error);
46+
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column text2 settings but have text");
47+
}
48+
49+
{
50+
NProtoBuf::RepeatedPtrField<TString> keyColumns;
51+
keyColumns.Add("text");
52+
keyColumns.Add("text");
53+
UNIT_ASSERT_C(!ValidateSettings(keyColumns, settings, error), error);
54+
UNIT_ASSERT_VALUES_EQUAL(error, "fulltext index should have a single text key column but have 2 of them");
55+
}
56+
57+
UNIT_ASSERT_C(ValidateSettings(keyColumns, settings, error), error);
3458
UNIT_ASSERT_VALUES_EQUAL(error, "");
3559
}
3660

ydb/core/protos/tx_datashard.proto

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1784,6 +1784,43 @@ message TEvPrefixKMeansResponse {
17841784
optional NKikimrIndexBuilder.TMeteringStats MeteringStats = 12;
17851785
}
17861786

1787+
message TEvBuildFulltextIndexRequest {
1788+
optional uint64 Id = 1;
1789+
1790+
optional uint64 TabletId = 2;
1791+
optional NKikimrProto.TPathID PathId = 3;
1792+
1793+
optional uint64 SnapshotTxId = 4;
1794+
optional uint64 SnapshotStep = 5;
1795+
1796+
optional uint64 SeqNoGeneration = 6;
1797+
optional uint64 SeqNoRound = 7;
1798+
1799+
optional Ydb.Table.FulltextIndexSettings Settings = 8;
1800+
1801+
optional string IndexName = 9;
1802+
1803+
repeated string KeyColumns = 10;
1804+
repeated string DataColumns = 11;
1805+
1806+
optional NKikimrIndexBuilder.TIndexBuildScanSettings ScanSettings = 12;
1807+
}
1808+
1809+
message TEvBuildFulltextIndexResponse {
1810+
optional uint64 Id = 1;
1811+
1812+
optional uint64 TabletId = 2;
1813+
optional NKikimrProto.TPathID PathId = 3;
1814+
1815+
optional uint64 RequestSeqNoGeneration = 4;
1816+
optional uint64 RequestSeqNoRound = 5;
1817+
1818+
optional NKikimrIndexBuilder.EBuildStatus Status = 6;
1819+
repeated Ydb.Issue.IssueMessage Issues = 7;
1820+
1821+
optional NKikimrIndexBuilder.TMeteringStats MeteringStats = 8;
1822+
}
1823+
17871824
message TEvCdcStreamScanRequest {
17881825
message TLimits {
17891826
optional uint32 BatchMaxBytes = 1 [default = 512000];

0 commit comments

Comments
 (0)