@@ -172,7 +172,28 @@ namespace {
172172 return false ;
173173 }
174174
175- if (settings.has_language ()) {
175+ if (settings.use_filter_snowball ()) {
176+ if (settings.use_filter_ngram () || settings.use_filter_edge_ngram ()) {
177+ error = " cannot set use_filter_snowball with use_filter_ngam or use_filter_edge_ngram at the same time" ;
178+ return false ;
179+ }
180+
181+ if (!settings.has_language ()) {
182+ error = " language required when use_filter_snowball is set" ;
183+ return false ;
184+ }
185+
186+ bool supportedLanguage = false ;
187+ for (auto ptr = sb_stemmer_list (); *ptr != nullptr ; ++ptr) {
188+ if (settings.language () == *ptr) {
189+ supportedLanguage = true ;
190+ }
191+ }
192+ if (!supportedLanguage) {
193+ error = " language is not supported by snowball" ;
194+ return false ;
195+ }
196+ } else if (settings.has_language ()) {
176197 error = " Unsupported language setting" ;
177198 return false ;
178199 }
@@ -246,7 +267,7 @@ namespace {
246267 }
247268}
248269
249- TVector<TString> Analyze (const TString& text, const Ydb::Table::FulltextIndexSettings::Analyzers& settings) {
270+ TVector<TString> Analyze (const TString& text, const Ydb::Table::FulltextIndexSettings::Analyzers& settings, struct sb_stemmer * stemmer ) {
250271 TVector<TString> tokens = Tokenize (text, settings.tokenizer ());
251272
252273 if (settings.use_filter_lowercase ()) {
@@ -268,6 +289,20 @@ TVector<TString> Analyze(const TString& text, const Ydb::Table::FulltextIndexSet
268289 }), tokens.end ());
269290 }
270291
292+ if (settings.use_filter_snowball ()) {
293+ Y_ASSERT (stemmer);
294+ for (auto & token : tokens) {
295+ const sb_symbol* stemmed = sb_stemmer_stem (
296+ stemmer,
297+ reinterpret_cast <const sb_symbol*>(token.data ()),
298+ token.size ()
299+ );
300+
301+ const size_t resultLength = sb_stemmer_length (stemmer);
302+ token = std::string (reinterpret_cast <const char *>(stemmed), resultLength);
303+ }
304+ }
305+
271306 if (settings.use_filter_ngram () || settings.use_filter_edge_ngram ()) {
272307 TVector<TString> ngrams;
273308 for (const auto & token : tokens) {
@@ -367,6 +402,8 @@ bool FillSetting(Ydb::Table::FulltextIndexSettings& settings, const TString& nam
367402 analyzers->set_filter_length_min (ParseInt32 (name, value, error));
368403 } else if (nameLower == " filter_length_max" ) {
369404 analyzers->set_filter_length_max (ParseInt32 (name, value, error));
405+ } else if (nameLower == " use_filter_snowball" ) {
406+ analyzers->set_use_filter_snowball (ParseBool (name, value, error));
370407 } else {
371408 error = TStringBuilder () << " Unknown index setting: " << name;
372409 return false ;
0 commit comments