diff --git a/be/src/olap/inverted_index_parser.cpp b/be/src/olap/inverted_index_parser.cpp
index 1d892b9357dc34..7c89f02c51e089 100644
--- a/be/src/olap/inverted_index_parser.cpp
+++ b/be/src/olap/inverted_index_parser.cpp
@@ -17,7 +17,6 @@
 
 #include "olap/inverted_index_parser.h"
 
-#include "olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h"
 #include "util/string_util.h"
 
 namespace doris {
diff --git a/be/src/olap/inverted_index_parser.h b/be/src/olap/inverted_index_parser.h
index a276d7de4f32ab..afd6e6619a3e1d 100644
--- a/be/src/olap/inverted_index_parser.h
+++ b/be/src/olap/inverted_index_parser.h
@@ -85,6 +85,7 @@ const std::string INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO = "false";
 const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE = "char_filter_type";
 const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN = "char_filter_pattern";
 const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT = "char_filter_replacement";
+const std::string INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE = "char_replace";
 
 const std::string INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY = "ignore_above";
 const std::string INVERTED_INDEX_PARSER_IGNORE_ABOVE_VALUE = "256";
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analysis_factory_mgr.cpp b/be/src/olap/rowset/segment_v2/inverted_index/analysis_factory_mgr.cpp
index 51585e5580b11d..af6442525888f2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analysis_factory_mgr.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analysis_factory_mgr.cpp
@@ -17,10 +17,13 @@
 
 #include "analysis_factory_mgr.h"
 
+#include "olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter_factory.h"
 #include "olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter_factory.h"
 #include "olap/rowset/segment_v2/inverted_index/token_filter/lower_case_filter_factory.h"
 #include "olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory.h"
+#include "olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer_factory.h"
 #include "olap/rowset/segment_v2/inverted_index/tokenizer/char/char_group_tokenizer_factory.h"
+#include "olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer_factory.h"
 #include "olap/rowset/segment_v2/inverted_index/tokenizer/keyword/keyword_tokenizer_factory.h"
 #include "olap/rowset/segment_v2/inverted_index/tokenizer/ngram/edge_ngram_tokenizer_factory.h"
 #include "olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_factory.h"
@@ -30,6 +33,10 @@ namespace doris::segment_v2::inverted_index {
 void AnalysisFactoryMgr::initialise() {
     static std::once_flag once_flag;
     std::call_once(once_flag, [this]() {
+        // char_filter
+        registerFactory("char_replace",
+                        []() { return std::make_shared<CharReplaceCharFilterFactory>(); });
+
         // tokenizer
         registerFactory("standard", []() { return std::make_shared<StandardTokenizerFactory>(); });
         registerFactory("keyword", []() { return std::make_shared<KeywordTokenizerFactory>(); });
@@ -38,6 +45,8 @@ void AnalysisFactoryMgr::initialise() {
                         []() { return std::make_shared<EdgeNGramTokenizerFactory>(); });
         registerFactory("char_group",
                         []() { return std::make_shared<CharGroupTokenizerFactory>(); });
+        registerFactory("basic", []() { return std::make_shared<BasicTokenizerFactory>(); });
+        registerFactory("icu", []() { return std::make_shared<ICUTokenizerFactory>(); });
 
         // token_filter
         registerFactory("lowercase", []() { return std::make_shared<LowerCaseFilterFactory>(); });
@@ -75,4 +84,7 @@ template std::shared_ptr<TokenizerFactory> AnalysisFactoryMgr::create<TokenizerF
 template std::shared_ptr<TokenFilterFactory> AnalysisFactoryMgr::create<TokenFilterFactory>(
         const std::string&, const Settings&);
 
+template std::shared_ptr<CharFilterFactory> AnalysisFactoryMgr::create<CharFilterFactory>(
+        const std::string&, const Settings&);
+
 } // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.cpp b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.cpp
index f37b33410a33b2..7167bbd63eaa86 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.cpp
@@ -35,23 +35,22 @@
 #include "olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_analyzer.h"
 #include "olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_analyzer.h"
 #include "olap/rowset/segment_v2/inverted_index/analyzer/ik/IKAnalyzer.h"
-#include "olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h"
+#include "olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter_factory.h"
 #include "runtime/exec_env.h"
 #include "runtime/index_policy/index_policy_mgr.h"
-#include "util/runtime_profile.h"
 
 namespace doris::segment_v2::inverted_index {
 #include "common/compile_check_begin.h"
 
-std::unique_ptr<lucene::util::Reader> InvertedIndexAnalyzer::create_reader(
-        CharFilterMap& char_filter_map) {
-    std::unique_ptr<lucene::util::Reader> reader =
-            std::make_unique<lucene::util::SStringReader<char>>();
+ReaderPtr InvertedIndexAnalyzer::create_reader(CharFilterMap& char_filter_map) {
+    ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
     if (!char_filter_map.empty()) {
-        reader = std::unique_ptr<lucene::util::Reader>(CharFilterFactory::create(
-                char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE], reader.release(),
-                char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN],
-                char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT]));
+        if (char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] ==
+            INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE) {
+            reader = std::make_shared<CharReplaceCharFilter>(
+                    reader, char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN],
+                    char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT]);
+        }
     }
     return reader;
 }
@@ -122,7 +121,7 @@ std::shared_ptr<lucene::analysis::Analyzer> InvertedIndexAnalyzer::create_analyz
 }
 
 std::vector<TermInfo> InvertedIndexAnalyzer::get_analyse_result(
-        lucene::util::Reader* reader, lucene::analysis::Analyzer* analyzer) {
+        ReaderPtr reader, lucene::analysis::Analyzer* analyzer) {
     std::vector<TermInfo> analyse_result;
 
     std::unique_ptr<lucene::analysis::TokenStream> token_stream(analyzer->tokenStream(L"", reader));
@@ -161,7 +160,7 @@ std::vector<TermInfo> InvertedIndexAnalyzer::get_analyse_result(
     inverted_index_ctx->analyzer = analyzer.get();
     auto reader = create_reader(inverted_index_ctx->char_filter_map);
     reader->init(search_str.data(), static_cast<int32_t>(search_str.size()), true);
-    return get_analyse_result(reader.get(), analyzer.get());
+    return get_analyse_result(reader, analyzer.get());
 }
 
 bool InvertedIndexAnalyzer::should_analyzer(const std::map<std::string, std::string>& properties) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h
index 682c7cd9b52848..464d8df02cd959 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h
@@ -23,6 +23,7 @@
 #include "olap/inverted_index_parser.h"
 #include "olap/olap_common.h"
 #include "olap/rowset/segment_v2/inverted_index/query/query.h"
+#include "olap/rowset/segment_v2/inverted_index/util/reader.h"
 #include "olap/rowset/segment_v2/inverted_index_query_type.h"
 
 namespace lucene {
@@ -38,12 +39,12 @@ namespace doris::segment_v2::inverted_index {
 
 class InvertedIndexAnalyzer {
 public:
-    static std::unique_ptr<lucene::util::Reader> create_reader(CharFilterMap& char_filter_map);
+    static ReaderPtr create_reader(CharFilterMap& char_filter_map);
 
     static std::shared_ptr<lucene::analysis::Analyzer> create_analyzer(
             const InvertedIndexCtx* inverted_index_ctx);
 
-    static std::vector<TermInfo> get_analyse_result(lucene::util::Reader* reader,
+    static std::vector<TermInfo> get_analyse_result(ReaderPtr reader,
                                                     lucene::analysis::Analyzer* analyzer);
 
     static std::vector<TermInfo> get_analyse_result(
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_analyzer.h b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_analyzer.h
index b9f4f96366600b..a080d6294c43b5 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_analyzer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_analyzer.h
@@ -17,9 +17,9 @@
 
 #pragma once
 
-#include <memory>
-
-#include "basic_tokenizer.h"
+#include "olap/rowset/segment_v2/inverted_index/token_filter/lower_case_filter.h"
+#include "olap/rowset/segment_v2/inverted_index/token_stream.h"
+#include "olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer.h"
 
 namespace doris::segment_v2 {
 
@@ -35,22 +35,47 @@ class BasicAnalyzer : public Analyzer {
     bool isSDocOpt() override { return true; }
 
     TokenStream* tokenStream(const TCHAR* fieldName, lucene::util::Reader* reader) override {
-        auto* tokenizer = _CLNEW BasicTokenizer(_lowercase, _ownReader);
-        tokenizer->reset(reader);
-        return (TokenStream*)tokenizer;
+        throw Exception(ErrorCode::INVERTED_INDEX_NOT_SUPPORTED,
+                        "BasicAnalyzer::tokenStream not supported");
     }
 
     TokenStream* reusableTokenStream(const TCHAR* fieldName,
                                      lucene::util::Reader* reader) override {
-        if (_tokenizer == nullptr) {
-            _tokenizer = std::make_unique<BasicTokenizer>(_lowercase, _ownReader);
+        throw Exception(ErrorCode::INVERTED_INDEX_NOT_SUPPORTED,
+                        "BasicAnalyzer::reusableTokenStream not supported");
+    }
+
+    TokenStream* tokenStream(const TCHAR* fieldName,
+                             const inverted_index::ReaderPtr& reader) override {
+        auto token_stream = create_components();
+        token_stream->set_reader(reader);
+        token_stream->get_token_stream()->reset();
+        return new inverted_index::TokenStreamWrapper(token_stream->get_token_stream());
+    }
+
+    TokenStream* reusableTokenStream(const TCHAR* fieldName,
+                                     const inverted_index::ReaderPtr& reader) override {
+        if (_reuse_token_stream == nullptr) {
+            _reuse_token_stream = create_components();
         }
-        _tokenizer->reset(reader);
-        return (TokenStream*)_tokenizer.get();
+        _reuse_token_stream->set_reader(reader);
+        return _reuse_token_stream->get_token_stream().get();
     };
 
 private:
-    std::unique_ptr<BasicTokenizer> _tokenizer;
+    inverted_index::TokenStreamComponentsPtr create_components() {
+        auto tk = std::make_shared<inverted_index::BasicTokenizer>();
+        tk->initialize(inverted_index::BasicTokenizerMode::L1);
+        inverted_index::TokenStreamPtr ts = tk;
+        if (_lowercase) {
+            auto lower_case_filter = std::make_shared<inverted_index::LowerCaseFilter>(tk);
+            lower_case_filter->initialize();
+            ts = lower_case_filter;
+        }
+        return std::make_shared<inverted_index::TokenStreamComponents>(tk, ts);
+    }
+
+    inverted_index::TokenStreamComponentsPtr _reuse_token_stream;
 };
 
 } // namespace doris::segment_v2
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.cpp b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.cpp
index c90b71a2e36d2f..312abd523374e6 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.cpp
@@ -17,44 +17,54 @@
 
 #include "custom_analyzer.h"
 
+#include "common/status.h"
 #include "olap/rowset/segment_v2/inverted_index/analysis_factory_mgr.h"
+#include "olap/rowset/segment_v2/inverted_index/token_stream.h"
 #include "runtime/exec_env.h"
 
 namespace doris::segment_v2::inverted_index {
 
 CustomAnalyzer::CustomAnalyzer(Builder* builder) {
     _tokenizer = builder->_tokenizer;
+    _char_filters = builder->_char_filters;
     _token_filters = builder->_token_filters;
 }
 
 TokenStream* CustomAnalyzer::tokenStream(const TCHAR* fieldName, lucene::util::Reader* reader) {
-    class TokenStreamWrapper : public TokenStream {
-    public:
-        explicit TokenStreamWrapper(std::shared_ptr<TokenStream> ts) : _impl(std::move(ts)) {}
-        ~TokenStreamWrapper() override = default;
-
-        Token* next(Token* token) override { return _impl->next(token); }
-        void close() override { _impl->close(); }
-        void reset() override { _impl->reset(); }
-
-    private:
-        std::shared_ptr<TokenStream> _impl;
-    };
+    throw Exception(ErrorCode::INVERTED_INDEX_NOT_SUPPORTED,
+                    "CustomAnalyzer::tokenStream not supported");
+}
+
+TokenStream* CustomAnalyzer::reusableTokenStream(const TCHAR* fieldName,
+                                                 lucene::util::Reader* reader) {
+    throw Exception(ErrorCode::INVERTED_INDEX_NOT_SUPPORTED,
+                    "CustomAnalyzer::reusableTokenStream not supported");
+}
+
+TokenStream* CustomAnalyzer::tokenStream(const TCHAR* fieldName, const ReaderPtr& reader) {
+    auto r = init_reader(reader);
     auto token_stream = create_components();
-    token_stream->set_reader(reader);
+    token_stream->set_reader(r);
     token_stream->get_token_stream()->reset();
     return new TokenStreamWrapper(token_stream->get_token_stream());
 }
 
-TokenStream* CustomAnalyzer::reusableTokenStream(const TCHAR* fieldName,
-                                                 lucene::util::Reader* reader) {
+TokenStream* CustomAnalyzer::reusableTokenStream(const TCHAR* fieldName, const ReaderPtr& reader) {
+    auto r = init_reader(reader);
     if (_reuse_token_stream == nullptr) {
         _reuse_token_stream = create_components();
     }
-    _reuse_token_stream->set_reader(reader);
+    _reuse_token_stream->set_reader(r);
     return _reuse_token_stream->get_token_stream().get();
 }
 
+ReaderPtr CustomAnalyzer::init_reader(ReaderPtr reader) {
+    for (const auto& filter : _char_filters) {
+        reader = filter->create(reader);
+    }
+    return reader;
+}
+
 TokenStreamComponentsPtr CustomAnalyzer::create_components() {
     auto tk = _tokenizer->create();
     TokenStreamPtr ts = tk;
@@ -69,6 +79,9 @@ CustomAnalyzerPtr CustomAnalyzer::build_custom_analyzer(const CustomAnalyzerConf
         throw Exception(ErrorCode::ILLEGAL_STATE, "Null configuration detected.");
     }
     CustomAnalyzer::Builder builder;
+    for (const auto& filter_config : config->get_char_filter_configs()) {
+        builder.add_char_filter(filter_config->get_name(), filter_config->get_params());
+    }
     builder.with_tokenizer(config->get_tokenizer_config()->get_name(),
                            config->get_tokenizer_config()->get_params());
     for (const auto& filter_config : config->get_token_filter_configs()) {
@@ -81,6 +94,10 @@ void CustomAnalyzer::Builder::with_tokenizer(const std::string& name, const Sett
     _tokenizer = AnalysisFactoryMgr::instance().create<TokenizerFactory>(name, params);
 }
 
+void CustomAnalyzer::Builder::add_char_filter(const std::string& name, const Settings& params) {
+    _char_filters.push_back(AnalysisFactoryMgr::instance().create<CharFilterFactory>(name, params));
+}
+
 void CustomAnalyzer::Builder::add_token_filter(const std::string& name, const Settings& params) {
     _token_filters.push_back(
             AnalysisFactoryMgr::instance().create<TokenFilterFactory>(name, params));
@@ -93,7 +110,7 @@ CustomAnalyzerPtr CustomAnalyzer::Builder::build() {
     return std::make_shared<CustomAnalyzer>(this);
 }
 
-void TokenStreamComponents::set_reader(CL_NS(util)::Reader* reader) {
+void TokenStreamComponents::set_reader(const ReaderPtr& reader) {
     _source->set_reader(reader);
 }
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.h b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.h
index 7b3bc0444aed1e..cb7294e7e6407d 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.h
@@ -17,18 +17,14 @@
 
 #pragma once
 
-#include "common/exception.h"
-#include "olap/rowset/segment_v2/inverted_index/analysis_factory_mgr.h"
 #include "olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.h"
+#include "olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h"
 #include "olap/rowset/segment_v2/inverted_index/setting.h"
 #include "olap/rowset/segment_v2/inverted_index/token_filter/token_filter_factory.h"
 #include "olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer_factory.h"
 
 namespace doris::segment_v2::inverted_index {
 
-class TokenStreamComponents;
-using TokenStreamComponentsPtr = std::shared_ptr<TokenStreamComponents>;
-
 class CustomAnalyzer;
 using CustomAnalyzerPtr = std::shared_ptr<CustomAnalyzer>;
 
@@ -40,11 +36,14 @@ class CustomAnalyzer : public Analyzer {
         ~Builder() = default;
 
         void with_tokenizer(const std::string& name, const Settings& params);
+        void add_char_filter(const std::string& name, const Settings& params);
         void add_token_filter(const std::string& name, const Settings& params);
+
         CustomAnalyzerPtr build();
 
     private:
         TokenizerFactoryPtr _tokenizer;
+        std::vector<CharFilterFactoryPtr> _char_filters;
         std::vector<TokenFilterFactoryPtr> _token_filters;
 
         friend class CustomAnalyzer;
@@ -58,29 +57,20 @@ class CustomAnalyzer : public Analyzer {
     TokenStream* tokenStream(const TCHAR* fieldName, lucene::util::Reader* reader) override;
     TokenStream* reusableTokenStream(const TCHAR* fieldName, lucene::util::Reader* reader) override;
 
+    TokenStream* tokenStream(const TCHAR* fieldName, const ReaderPtr& reader) override;
+    TokenStream* reusableTokenStream(const TCHAR* fieldName, const ReaderPtr& reader) override;
+
     static CustomAnalyzerPtr build_custom_analyzer(const CustomAnalyzerConfigPtr& config);
 
 private:
+    ReaderPtr init_reader(ReaderPtr reader);
     TokenStreamComponentsPtr create_components();
 
     TokenizerFactoryPtr _tokenizer;
+    std::vector<CharFilterFactoryPtr> _char_filters;
     std::vector<TokenFilterFactoryPtr> _token_filters;
 
     TokenStreamComponentsPtr _reuse_token_stream;
 };
 
-class TokenStreamComponents {
-public:
-    TokenStreamComponents(TokenizerPtr tokenizer, TokenStreamPtr result)
-            : _source(std::move(tokenizer)), _sink(std::move(result)) {}
-
-    void set_reader(CL_NS(util)::Reader* reader);
-    TokenStreamPtr get_token_stream();
-    TokenizerPtr get_source();
-
-private:
-    TokenizerPtr _source;
-    TokenStreamPtr _sink;
-};
-
 } // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.cpp b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.cpp
index f1d593ecfa53e0..161d267efb6ea5 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.cpp
@@ -23,6 +23,7 @@ namespace doris::segment_v2::inverted_index {
 
 CustomAnalyzerConfig::CustomAnalyzerConfig(Builder* builder) {
     _tokenizer_config = builder->_tokenizer_config;
+    _char_filters = builder->_char_filters;
     _token_filters = builder->_token_filters;
 }
 
@@ -30,6 +31,10 @@ ComponentConfigPtr CustomAnalyzerConfig::get_tokenizer_config() {
     return _tokenizer_config;
 }
 
+std::vector<ComponentConfigPtr> CustomAnalyzerConfig::get_char_filter_configs() {
+    return _char_filters;
+}
+
 std::vector<ComponentConfigPtr> CustomAnalyzerConfig::get_token_filter_configs() {
     return _token_filters;
 }
@@ -39,6 +44,11 @@ void CustomAnalyzerConfig::Builder::with_tokenizer_config(const std::string& nam
     _tokenizer_config = std::make_shared<ComponentConfig>(name, params);
 }
 
+void CustomAnalyzerConfig::Builder::add_char_filter_config(const std::string& name,
+                                                           const Settings& params) {
+    _char_filters.emplace_back(std::make_shared<ComponentConfig>(name, params));
+}
+
 void CustomAnalyzerConfig::Builder::add_token_filter_config(const std::string& name,
                                                             const Settings& params) {
     _token_filters.emplace_back(std::make_shared<ComponentConfig>(name, params));
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.h b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.h
index 0df8507e26afa0..134d4ee0d45d78 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.h
@@ -39,11 +39,13 @@ class CustomAnalyzerConfig {
         ~Builder() = default;
 
         void with_tokenizer_config(const std::string& name, const Settings& params);
+        void add_char_filter_config(const std::string& name, const Settings& params);
         void add_token_filter_config(const std::string& name, const Settings& params);
         CustomAnalyzerConfigPtr build();
 
     private:
         ComponentConfigPtr _tokenizer_config;
+        std::vector<ComponentConfigPtr> _char_filters;
         std::vector<ComponentConfigPtr> _token_filters;
 
         friend class CustomAnalyzerConfig;
@@ -53,10 +55,12 @@ class CustomAnalyzerConfig {
     ~CustomAnalyzerConfig() = default;
 
     ComponentConfigPtr get_tokenizer_config();
+    std::vector<ComponentConfigPtr> get_char_filter_configs();
     std::vector<ComponentConfigPtr> get_token_filter_configs();
 
 private:
     ComponentConfigPtr _tokenizer_config;
+    std::vector<ComponentConfigPtr> _char_filters;
     std::vector<ComponentConfigPtr> _token_filters;
 };
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_analyzer.h b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_analyzer.h
index 072cf85bc7d814..ccf27dfc8cb17c 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_analyzer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_analyzer.h
@@ -17,9 +17,9 @@
 
 #pragma once
 
-#include <memory>
-
-#include "icu_tokenizer.h"
+#include "olap/rowset/segment_v2/inverted_index/token_filter/lower_case_filter.h"
+#include "olap/rowset/segment_v2/inverted_index/token_stream.h"
+#include "olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer.h"
 
 namespace doris::segment_v2 {
 
@@ -37,25 +37,48 @@ class ICUAnalyzer : public Analyzer {
     void initDict(const std::string& dictPath) override { dictPath_ = dictPath; }
 
     TokenStream* tokenStream(const TCHAR* fieldName, lucene::util::Reader* reader) override {
-        auto* tokenizer = _CLNEW ICUTokenizer(_lowercase, _ownReader);
-        tokenizer->initialize(dictPath_);
-        tokenizer->reset(reader);
-        return (TokenStream*)tokenizer;
+        throw Exception(ErrorCode::INVERTED_INDEX_NOT_SUPPORTED,
+                        "ICUAnalyzer::tokenStream not supported");
     }
 
     TokenStream* reusableTokenStream(const TCHAR* fieldName,
                                      lucene::util::Reader* reader) override {
-        if (tokenizer_ == nullptr) {
-            tokenizer_ = std::make_unique<ICUTokenizer>(_lowercase, _ownReader);
-            tokenizer_->initialize(dictPath_);
+        throw Exception(ErrorCode::INVERTED_INDEX_NOT_SUPPORTED,
+                        "ICUAnalyzer::reusableTokenStream not supported");
+    }
+
+    TokenStream* tokenStream(const TCHAR* fieldName,
+                             const inverted_index::ReaderPtr& reader) override {
+        auto token_stream = create_components();
+        token_stream->set_reader(reader);
+        token_stream->get_token_stream()->reset();
+        return new inverted_index::TokenStreamWrapper(token_stream->get_token_stream());
+    }
+
+    TokenStream* reusableTokenStream(const TCHAR* fieldName,
+                                     const inverted_index::ReaderPtr& reader) override {
+        if (_reuse_token_stream == nullptr) {
+            _reuse_token_stream = create_components();
         }
-        tokenizer_->reset(reader);
-        return (TokenStream*)tokenizer_.get();
+        _reuse_token_stream->set_reader(reader);
+        return _reuse_token_stream->get_token_stream().get();
     };
 
 private:
+    inverted_index::TokenStreamComponentsPtr create_components() {
+        auto tk = std::make_shared<inverted_index::ICUTokenizer>();
+        tk->initialize(dictPath_);
+        inverted_index::TokenStreamPtr ts = tk;
+        if (_lowercase) {
+            auto lower_case_filter = std::make_shared<inverted_index::LowerCaseFilter>(tk);
+            lower_case_filter->initialize();
+            ts = lower_case_filter;
+        }
+        return std::make_shared<inverted_index::TokenStreamComponents>(tk, ts);
+    }
+
     std::string dictPath_;
-    std::unique_ptr<ICUTokenizer> tokenizer_;
+    inverted_index::TokenStreamComponentsPtr _reuse_token_stream;
 };
 
 } // namespace doris::segment_v2
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter.h b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter.h
new file mode 100644
index 00000000000000..7e4c3a849f3446
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "common/exception.h"
+#include "olap/rowset/segment_v2/inverted_index/util/reader.h"
+
+namespace doris::segment_v2::inverted_index {
+
+class DorisCharFilter : public lucene::util::Reader {
+public:
+    DorisCharFilter(ReaderPtr reader) : _reader(std::move(reader)) {}
+    ~DorisCharFilter() override = default;
+
+    virtual void initialize() = 0;
+
+    int64_t position() override {
+        throw Exception(ErrorCode::INVERTED_INDEX_NOT_SUPPORTED, "CharFilter::position");
+    }
+
+    int64_t skip(int64_t ntoskip) override {
+        throw Exception(ErrorCode::INVERTED_INDEX_NOT_SUPPORTED, "CharFilter::skip");
+    }
+
+    size_t size() override {
+        throw Exception(ErrorCode::INVERTED_INDEX_NOT_SUPPORTED, "CharFilter::size");
+    }
+
+protected:
+    ReaderPtr _reader;
+};
+using CharFilterPtr = std::shared_ptr<DorisCharFilter>;
+
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h
index bebbea58f72d86..925f9adaf6e9ed 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h
@@ -17,22 +17,18 @@
 
 #pragma once
 
-#include "olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h"
+#include "olap/rowset/segment_v2/inverted_index/abstract_analysis_factory.h"
+#include "olap/rowset/segment_v2/inverted_index/util/reader.h"
 
-namespace doris {
+namespace doris::segment_v2::inverted_index {
 
-static const std::string INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE = "char_replace";
-
-class CharFilterFactory {
+class CharFilterFactory : public AbstractAnalysisFactory {
 public:
-    template <typename... Args>
-    static lucene::analysis::CharFilter* create(const std::string& name, Args&&... args) {
-        DBUG_EXECUTE_IF("CharFilterFactory::create_return_nullptr", { return nullptr; })
-        if (name == INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE) {
-            return new CharReplaceCharFilter(std::forward<Args>(args)...);
-        }
-        return nullptr;
-    }
+    CharFilterFactory() = default;
+    ~CharFilterFactory() override = default;
+
+    virtual ReaderPtr create(const ReaderPtr& in) = 0;
 };
+using CharFilterFactoryPtr = std::shared_ptr<CharFilterFactory>;
 
-} // namespace doris
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.cpp b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.cpp
index a75bef53d4554c..e2f6b663070daf 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.cpp
@@ -19,16 +19,24 @@
 
 #include <boost/algorithm/string/replace.hpp>
 
-namespace doris {
+namespace doris::segment_v2::inverted_index {
 #include "common/compile_check_begin.h"
-CharReplaceCharFilter::CharReplaceCharFilter(lucene::util::Reader* in, const std::string& pattern,
-                                             const std::string& replacement)
-        : CharFilter(in), _replacement(replacement) {
-    std::for_each(pattern.begin(), pattern.end(), [this](uint8_t c) { _patterns.set(c); });
+
+CharReplaceCharFilter::CharReplaceCharFilter(ReaderPtr reader, const std::string& pattern,
+                                             std::string replacement)
+        : DorisCharFilter(std::move(reader)), _replacement(std::move(replacement)) {
+    std::ranges::for_each(pattern, [this](uint8_t c) { _patterns.set(c); });
+}
+
+void CharReplaceCharFilter::initialize() {
+    if (_transformed_input.size() != 0) {
+        return;
+    }
+    fill();
 }
 
 void CharReplaceCharFilter::init(const void* _value, int32_t _length, bool copyData) {
-    input_->init(_value, _length, copyData);
+    _reader->init(_value, _length, copyData);
     fill();
 }
 
@@ -41,8 +49,8 @@ int32_t CharReplaceCharFilter::readCopy(void* start, int32_t off, int32_t len) {
 }
 
 void CharReplaceCharFilter::fill() {
-    _buf.resize(input_->size());
-    input_->readCopy(_buf.data(), 0, static_cast<int32_t>(_buf.size()));
+    _buf.resize(_reader->size());
+    _reader->readCopy(_buf.data(), 0, static_cast<int32_t>(_buf.size()));
     process_pattern(_buf);
     _transformed_input.init(_buf.data(), static_cast<int32_t>(_buf.size()), false);
 }
@@ -56,5 +64,5 @@ void CharReplaceCharFilter::process_pattern(std::string& buf) {
     }
 }
 
-} // namespace doris
 #include "common/compile_check_end.h"
+} // namespace doris::segment_v2::inverted_index
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
index 1e5e6f5d5cedd0..082c80ffc52fde 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h
@@ -17,19 +17,19 @@
 
 #pragma once
 
-#include <CLucene.h> // IWYU pragma: keep
-#include <CLucene/analysis/CharFilter.h>
-
 #include <bitset>
 
-namespace doris {
+#include "olap/rowset/segment_v2/inverted_index/char_filter/char_filter.h"
+
+namespace doris::segment_v2::inverted_index {
 
-class CharReplaceCharFilter : public lucene::analysis::CharFilter {
+class CharReplaceCharFilter : public DorisCharFilter {
 public:
-    CharReplaceCharFilter(lucene::util::Reader* in, const std::string& pattern,
-                          const std::string& replacement);
+    CharReplaceCharFilter(ReaderPtr in, const std::string& pattern, std::string replacement);
     ~CharReplaceCharFilter() override = default;
 
+    void initialize() override;
+
     void init(const void* _value, int32_t _length, bool copyData) override;
     int32_t read(const void** start, int32_t min, int32_t max) override;
     int32_t readCopy(void* start, int32_t off, int32_t len) override;
@@ -47,4 +47,4 @@ class CharReplaceCharFilter : public lucene::analysis::CharFilter {
     lucene::util::SStringReader<char> _transformed_input;
 };
 
-} // namespace doris
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter_factory.h b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter_factory.h
new file mode 100644
index 00000000000000..debdb59107176c
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter_factory.h
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "olap/rowset/segment_v2/inverted_index/char_filter/char_filter_factory.h"
+#include "olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter.h"
+
+namespace doris::segment_v2::inverted_index {
+
+static const std::string CHAR_REPLACE_PATTERN = "pattern";
+static const std::string CHAR_REPLACE_REPLACEMENT = "replacement";
+
+static const std::string CHAR_REPLACE_DEFAULT_PATTERN = ",._";
+
+class CharReplaceCharFilterFactory : public CharFilterFactory {
+public:
+    CharReplaceCharFilterFactory() = default;
+    ~CharReplaceCharFilterFactory() override = default;
+
+    void initialize(const Settings& settings) override {
+        _pattern = settings.get_string(CHAR_REPLACE_PATTERN, CHAR_REPLACE_DEFAULT_PATTERN);
+        if (_pattern.empty()) {
+            throw Exception(ErrorCode::INVALID_ARGUMENT,
+                            "Missing '${CHAR_REPLACE_PATTERN}' for char_replace filter type");
+        }
+        for (char ch : _pattern) {
+            unsigned int uc = static_cast<unsigned char>(ch);
+            if (uc > 255) {
+                throw Exception(ErrorCode::INVALID_ARGUMENT,
+                                "Invalid '${CHAR_REPLACE_PATTERN}' for char_replace "
+                                "filter type: each char must "
+                                "be in [0,255]");
+            }
+        }
+        _replacement = settings.get_string(CHAR_REPLACE_REPLACEMENT, " ");
+        if (_replacement.size() != 1) {
+            throw Exception(ErrorCode::INVALID_ARGUMENT,
+                            "Invalid '${CHAR_REPLACE_REPLACEMENT}' for char_replace "
+                            "filter type: must be exactly 1 byte");
+        }
+        unsigned int rep = static_cast<unsigned char>(_replacement[0]);
+        if (rep > 255) {
+            throw Exception(ErrorCode::INVALID_ARGUMENT,
+                            "Invalid '${CHAR_REPLACE_REPLACEMENT}' for char_replace "
+                            "filter type: must be in [0,255]");
+        }
+    }
+
+    ReaderPtr create(const ReaderPtr& reader) override {
+        auto r = std::make_shared<CharReplaceCharFilter>(reader, _pattern, _replacement);
+        r->initialize();
+        return r;
+    }
+
+private:
+    std::string _pattern;
+    std::string _replacement;
+};
+
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/setting.h b/be/src/olap/rowset/segment_v2/inverted_index/setting.h
index e06deca5e4f1d6..51782ab0b2de5d 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/setting.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/setting.h
@@ -22,8 +22,7 @@
 #include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/split.hpp>
 #include <boost/algorithm/string/trim.hpp>
-#include <memory>
-#include <regex>
+#include <boost/regex.hpp>
 #include <unordered_map>
 #include <utility>
 
@@ -77,25 +76,65 @@ class Settings {
         return default_value;
     }
 
-    std::string get_string(const std::string& key) const {
+    std::string get_string(const std::string& key, const std::string& default_value = "") const {
         auto it = _args.find(key);
         if (it != _args.end()) {
             return it->second;
         }
-        return "";
+        return default_value;
     }
 
     std::vector<std::string> get_entry_list(const std::string& key) const {
+        static const boost::regex sep(R"((?<=\])\s*,\s*(?=\[))");
         std::vector<std::string> lists;
         auto it = _args.find(key);
         if (it != _args.end()) {
-            static std::regex pattern(R"(\[([^\]]+)\])");
-            std::smatch match;
-            std::sregex_iterator iter(it->second.begin(), it->second.end(), pattern);
-            std::sregex_iterator end;
-            for (; iter != end; ++iter) {
-                if (iter->size() > 1) {
-                    lists.emplace_back((*iter)[1].str());
+            std::string trimmed_input = boost::algorithm::trim_copy(it->second);
+            if (trimmed_input.empty()) {
+                return lists;
+            }
+
+            auto validate_single = [&](const std::string& item, const std::string& prefix) {
+                if (item.size() < 2 || item.front() != '[' || item.back() != ']') {
+                    throw Exception(ErrorCode::INVALID_ARGUMENT,
+                                    prefix + key + " must be enclosed in []");
+                }
+                int depth = 0;
+                for (size_t i = 0; i + 1 < item.size(); ++i) {
+                    char c = item[i];
+                    if (c == '[') {
+                        ++depth;
+                    } else if (c == ']') {
+                        --depth;
+                        if (depth == 0) {
+                            throw Exception(ErrorCode::INVALID_ARGUMENT,
+                                            prefix + key + " must be enclosed in []");
+                        }
+                    }
+                }
+            };
+
+            if (boost::regex_search(trimmed_input, sep)) {
+                boost::sregex_token_iterator regex_it(trimmed_input.begin(), trimmed_input.end(),
+                                                      sep, -1);
+                boost::sregex_token_iterator end;
+                for (; regex_it != end; ++regex_it) {
+                    std::string item = boost::algorithm::trim_copy(regex_it->str());
+                    validate_single(item, "Each item in ");
+                    std::string content = item.substr(1, item.size() - 2);
+                    if (!content.empty()) {
+                        lists.emplace_back(content);
+                    }
+                }
+            } else {
+                if (trimmed_input.size() < 2 || trimmed_input.front() != '[' ||
+                    trimmed_input.back() != ']') {
+                    throw Exception(ErrorCode::INVALID_ARGUMENT,
+                                    "Item in " + key + " must be enclosed in []");
+                }
+                std::string content = trimmed_input.substr(1, trimmed_input.size() - 2);
+                if (!content.empty()) {
+                    lists.emplace_back(content);
                 }
             }
         }
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/token_filter/token_filter.h b/be/src/olap/rowset/segment_v2/inverted_index/token_filter/token_filter.h
index 41631ae6e44a19..0271db5a39292e 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/token_filter/token_filter.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/token_filter/token_filter.h
@@ -19,8 +19,6 @@
 
 #include "olap/rowset/segment_v2/inverted_index/token_stream.h"
 
-using TokenStreamPtr = std::shared_ptr<TokenStream>;
-
 namespace doris::segment_v2::inverted_index {
 
 class DorisTokenFilter : public TokenFilter, public DorisTokenStream {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory.h b/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory.h
index a125150bcadd89..9e0e60b73a0ed9 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory.h
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include <regex>
+
 #include "olap/rowset/segment_v2/inverted_index/setting.h"
 #include "token_filter_factory.h"
 #include "word_delimiter_filter.h"
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/token_stream.h b/be/src/olap/rowset/segment_v2/inverted_index/token_stream.h
index c2850f779922a3..b352a1f2cc7b00 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/token_stream.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/token_stream.h
@@ -21,19 +21,22 @@
 
 #include <memory>
 #include <string_view>
-#include <unordered_set>
 
 #include "CLucene.h"
 #include "CLucene/analysis/AnalysisHeader.h"
 #include "common/cast_set.h"
-#include "common/exception.h"
-#include "common/logging.h"
+#include "olap/rowset/segment_v2/inverted_index/util/reader.h"
 
 using namespace lucene::analysis;
 
 namespace doris::segment_v2::inverted_index {
 #include "common/compile_check_begin.h"
 
+class DorisTokenizer;
+using TokenizerPtr = std::shared_ptr<DorisTokenizer>;
+
+using TokenStreamPtr = std::shared_ptr<TokenStream>;
+
 /**
  * All custom tokenizers and token_filters must use the following functions 
  * to set token information. Using these unified set methods helps avoid 
@@ -59,5 +62,33 @@ class DorisTokenStream {
     void set_position_increment(Token* t, int32_t pos) { t->setPositionIncrement(pos); }
 };
 
+class TokenStreamWrapper : public TokenStream {
+public:
+    explicit TokenStreamWrapper(std::shared_ptr<TokenStream> ts) : _impl(std::move(ts)) {}
+    ~TokenStreamWrapper() override = default;
+
+    Token* next(Token* token) override { return _impl->next(token); }
+    void close() override { _impl->close(); }
+    void reset() override { _impl->reset(); }
+
+private:
+    std::shared_ptr<TokenStream> _impl;
+};
+
+class TokenStreamComponents {
+public:
+    TokenStreamComponents(TokenizerPtr tokenizer, TokenStreamPtr result)
+            : _source(std::move(tokenizer)), _sink(std::move(result)) {}
+
+    void set_reader(const ReaderPtr& reader);
+    TokenStreamPtr get_token_stream();
+    TokenizerPtr get_source();
+
+private:
+    TokenizerPtr _source;
+    TokenStreamPtr _sink;
+};
+using TokenStreamComponentsPtr = std::shared_ptr<TokenStreamComponents>;
+
 }; // namespace doris::segment_v2::inverted_index
 #include "common/compile_check_end.h"
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_tokenizer.cpp b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer.cpp
similarity index 73%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_tokenizer.cpp
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer.cpp
index 0679fdbdd26da8..2ac699dae3fffb 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_tokenizer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer.cpp
@@ -19,7 +19,7 @@
 
 #include <unicode/unistr.h>
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 #include "common/compile_check_begin.h"
 
 #define IS_IN_RANGE(c, start, end) ((uint32_t)((c) - (start)) <= ((end) - (start)))
@@ -29,14 +29,12 @@ namespace doris::segment_v2 {
      IS_IN_RANGE(c, 0x20000, 0x2A6DF) || IS_IN_RANGE(c, 0x2A700, 0x2EBEF) || \
      IS_IN_RANGE(c, 0x30000, 0x3134A))
 
-BasicTokenizer::BasicTokenizer() {
-    this->lowercase = false;
-    this->ownReader = false;
+BasicTokenizer::BasicTokenizer(bool own_reader) {
+    this->ownReader = own_reader;
 }
 
-BasicTokenizer::BasicTokenizer(bool lower_case, bool own_reader) : BasicTokenizer() {
-    this->lowercase = lower_case;
-    this->ownReader = own_reader;
+void BasicTokenizer::initialize(BasicTokenizerMode mode) {
+    _mode = mode;
 }
 
 Token* BasicTokenizer::next(Token* token) {
@@ -50,21 +48,28 @@ Token* BasicTokenizer::next(Token* token) {
     return token;
 }
 
-void BasicTokenizer::reset(lucene::util::Reader* reader) {
+void BasicTokenizer::reset() {
+    DorisTokenizer::reset();
+
     _buffer_index = 0;
     _data_len = 0;
     _tokens_text.clear();
 
-    _buffer.resize(reader->size());
-    size_t numRead = reader->readCopy(_buffer.data(), 0, static_cast<int32_t>(_buffer.size()));
+    _buffer.resize(_in->size());
+    size_t numRead = _in->readCopy(_buffer.data(), 0, static_cast<int32_t>(_buffer.size()));
     (void)numRead;
     assert(_buffer.size() == numRead);
 
-    cut();
+    if (_mode == BasicTokenizerMode::L1) {
+        cut<BasicTokenizerMode::L1>();
+    } else if (_mode == BasicTokenizerMode::L2) {
+        cut<BasicTokenizerMode::L2>();
+    }
 
     _data_len = static_cast<int32_t>(_tokens_text.size());
 }
 
+template <BasicTokenizerMode mode>
 void BasicTokenizer::cut() {
     auto* s = (uint8_t*)_buffer.data();
     auto length = static_cast<int32_t>(_buffer.size());
@@ -97,7 +102,15 @@ void BasicTokenizer::cut() {
                 continue;
             }
 
-            if (IS_CHINESE_CHAR(c)) {
+            if constexpr (mode == BasicTokenizerMode::L1) {
+                if (IS_CHINESE_CHAR(c)) {
+                    const int32_t len = i - prev_i;
+                    _tokens_text.emplace_back(reinterpret_cast<const char*>(s + prev_i), len);
+                }
+            } else if constexpr (mode == BasicTokenizerMode::L2) {
+                if (u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)) {
+                    continue;
+                }
                 const int32_t len = i - prev_i;
                 _tokens_text.emplace_back(reinterpret_cast<const char*>(s + prev_i), len);
             }
@@ -106,4 +119,4 @@ void BasicTokenizer::cut() {
 }
 
 #include "common/compile_check_end.h"
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_tokenizer.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer.h
similarity index 65%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_tokenizer.h
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer.h
index e07a5e37d78a9a..e317de55fa81d5 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_tokenizer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer.h
@@ -17,32 +17,38 @@
 
 #pragma once
 
-#include <unicode/utext.h>
-
-#include "CLucene.h"
-#include "CLucene/analysis/AnalysisHeader.h"
-#include "CLucene/analysis/icu/ICUCommon.h"
+#include "olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer.h"
 
 using namespace lucene::analysis;
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
+
+enum class BasicTokenizerMode {
+    L1 = 1, // English + numbers + Chinese tokenization
+    L2 = 2  // L1 + all Unicode characters tokenized
+};
 
-class BasicTokenizer : public Tokenizer {
+class BasicTokenizer : public DorisTokenizer {
 public:
-    BasicTokenizer();
-    BasicTokenizer(bool lowercase, bool ownReader);
+    BasicTokenizer() = default;
+    BasicTokenizer(bool own_reader);
     ~BasicTokenizer() override = default;
 
+    void initialize(BasicTokenizerMode mode);
+
     Token* next(Token* token) override;
-    void reset(lucene::util::Reader* reader) override;
+    void reset() override;
 
+private:
+    template <BasicTokenizerMode mode>
     void cut();
 
-private:
     int32_t _buffer_index = 0;
     int32_t _data_len = 0;
     std::string _buffer;
     std::vector<std::string_view> _tokens_text;
+
+    BasicTokenizerMode _mode = BasicTokenizerMode::L1;
 };
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer_factory.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer_factory.h
new file mode 100644
index 00000000000000..58aa43dbc06762
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer_factory.h
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "basic_tokenizer.h"
+#include "common/exception.h"
+#include "olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer_factory.h"
+
+namespace doris::segment_v2::inverted_index {
+
+class BasicTokenizerFactory : public TokenizerFactory {
+public:
+    BasicTokenizerFactory() = default;
+    ~BasicTokenizerFactory() override = default;
+
+    void initialize(const Settings& settings) override {
+        int32_t mode = settings.get_int("mode", static_cast<int32_t>(BasicTokenizerMode::L1));
+        if (mode < 1 || mode > 2) {
+            throw Exception(ErrorCode::INVALID_ARGUMENT, "Invalid mode for basic tokenizer: {}",
+                            mode);
+        }
+        _mode = static_cast<BasicTokenizerMode>(mode);
+    }
+
+    TokenizerPtr create() override {
+        auto tokenzier = std::make_shared<BasicTokenizer>();
+        tokenzier->initialize(_mode);
+        return tokenzier;
+    }
+
+private:
+    BasicTokenizerMode _mode = BasicTokenizerMode::L1;
+};
+
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/break_iterator_wrapper.cpp b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/break_iterator_wrapper.cpp
similarity index 97%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/break_iterator_wrapper.cpp
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/break_iterator_wrapper.cpp
index 50094e54f7bf6c..0ed63c28a152e6 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/break_iterator_wrapper.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/break_iterator_wrapper.cpp
@@ -25,7 +25,7 @@
 #include "icu_common.h"
 #include "icu_tokenizer_config.h"
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
 icu::UnicodeSet BreakIteratorWrapper::EMOJI_RK;
 icu::UnicodeSet BreakIteratorWrapper::EMOJI;
@@ -104,4 +104,4 @@ void BreakIteratorWrapper::set_text(const UChar* text, int32_t start, int32_t le
     status_ = UBRK_WORD_NONE;
 }
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/break_iterator_wrapper.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/break_iterator_wrapper.h
similarity index 94%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/break_iterator_wrapper.h
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/break_iterator_wrapper.h
index dea60d1d1f7fad..554b02c9eaf4f0 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/break_iterator_wrapper.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/break_iterator_wrapper.h
@@ -25,7 +25,7 @@
 
 #include "icu_common.h"
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
 class BreakIteratorWrapper {
 public:
@@ -51,4 +51,4 @@ class BreakIteratorWrapper {
 };
 using BreakIteratorWrapperPtr = std::unique_ptr<BreakIteratorWrapper>;
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/composite_break_iterator.cpp b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/composite_break_iterator.cpp
similarity index 97%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/composite_break_iterator.cpp
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/composite_break_iterator.cpp
index e178ad35c13917..5a4d56d11e6cc0 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/composite_break_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/composite_break_iterator.cpp
@@ -21,7 +21,7 @@
 
 #include <memory>
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
 CompositeBreakIterator::CompositeBreakIterator(const ICUTokenizerConfigPtr& config)
         : config_(config) {
@@ -80,4 +80,4 @@ BreakIteratorWrapper* CompositeBreakIterator::get_break_iterator(int32_t scriptC
     return word_breakers_[scriptCode].get();
 }
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/composite_break_iterator.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/composite_break_iterator.h
similarity index 94%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/composite_break_iterator.h
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/composite_break_iterator.h
index 8599be88dc2ce1..4d5e2a6b4d6bea 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/composite_break_iterator.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/composite_break_iterator.h
@@ -29,7 +29,7 @@
 #include "icu_tokenizer_config.h"
 #include "script_iterator.h"
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
 class CompositeBreakIterator {
 public:
@@ -55,4 +55,4 @@ class CompositeBreakIterator {
 };
 using CompositeBreakIteratorPtr = std::unique_ptr<CompositeBreakIterator>;
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/default_icu_tokenizer_config.cpp b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/default_icu_tokenizer_config.cpp
similarity index 98%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/default_icu_tokenizer_config.cpp
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/default_icu_tokenizer_config.cpp
index dfbcf2dcdf65a0..4d9d8d9b2a73c1 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/default_icu_tokenizer_config.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/default_icu_tokenizer_config.cpp
@@ -24,7 +24,7 @@
 #include <sstream>
 #include <string>
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
 BreakIteratorPtr DefaultICUTokenizerConfig::cjk_break_iterator_;
 BreakIteratorPtr DefaultICUTokenizerConfig::default_break_iterator_;
@@ -125,4 +125,4 @@ void DefaultICUTokenizerConfig::read_break_iterator(BreakIteratorPtr& rbbi,
     }
 }
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/default_icu_tokenizer_config.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/default_icu_tokenizer_config.h
similarity index 94%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/default_icu_tokenizer_config.h
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/default_icu_tokenizer_config.h
index 6500cf230ebb03..21e9359e5a3288 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/default_icu_tokenizer_config.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/default_icu_tokenizer_config.h
@@ -19,7 +19,7 @@
 
 #include "icu_tokenizer_config.h"
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
 class DefaultICUTokenizerConfig : public ICUTokenizerConfig {
 public:
@@ -41,4 +41,4 @@ class DefaultICUTokenizerConfig : public ICUTokenizerConfig {
     bool myanmar_as_words_ = false;
 };
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_common.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_common.h
similarity index 93%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_common.h
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_common.h
index 1cdffab48d3dd5..0066e26ec9b243 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_common.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_common.h
@@ -31,7 +31,7 @@
 #include "unicode/utext.h"
 #include "unicode/utf8.h"
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
 using BreakIteratorPtr = std::unique_ptr<icu::BreakIterator>;
 
@@ -45,4 +45,4 @@ struct UTextDeleter {
 
 using UTextPtr = std::unique_ptr<UText, UTextDeleter>;
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_tokenizer.cpp b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer.cpp
similarity index 84%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_tokenizer.cpp
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer.cpp
index 670ae6c2d08d38..e8723ff077e5fd 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_tokenizer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer.cpp
@@ -22,18 +22,15 @@
 #include <memory>
 #include <string>
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 #include "common/compile_check_begin.h"
-ICUTokenizer::ICUTokenizer() {
-    this->lowercase = false;
-    this->ownReader = false;
 
+ICUTokenizer::ICUTokenizer() {
     config_ = std::make_shared<DefaultICUTokenizerConfig>(true, true);
     breaker_ = std::make_unique<CompositeBreakIterator>(config_);
 }
 
-ICUTokenizer::ICUTokenizer(bool lower_case, bool own_reader) : ICUTokenizer() {
-    this->lowercase = lower_case;
+ICUTokenizer::ICUTokenizer(bool own_reader) : ICUTokenizer() {
     this->ownReader = own_reader;
 }
 
@@ -69,9 +66,10 @@ Token* ICUTokenizer::next(Token* token) {
     return token;
 }
 
-void ICUTokenizer::reset(lucene::util::Reader* reader) {
+void ICUTokenizer::reset() {
+    DorisTokenizer::reset();
     const char* buf = nullptr;
-    int32_t len = reader->read((const void**)&buf, 0, static_cast<int32_t>(reader->size()));
+    int32_t len = _in->read((const void**)&buf, 0, static_cast<int32_t>(_in->size()));
     buffer_ = icu::UnicodeString::fromUTF8(icu::StringPiece(buf, len));
     if (!buffer_.isEmpty() && buffer_.isBogus()) {
         _CLTHROWT(CL_ERR_Runtime, "Failed to convert UTF-8 string to UnicodeString.");
@@ -79,5 +77,5 @@ void ICUTokenizer::reset(lucene::util::Reader* reader) {
     breaker_->set_text(buffer_.getBuffer(), 0, buffer_.length());
 }
 
-} // namespace doris::segment_v2
-#include "common/compile_check_end.h"
\ No newline at end of file
+#include "common/compile_check_end.h"
+} // namespace doris::segment_v2::inverted_index
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_tokenizer.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer.h
similarity index 82%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_tokenizer.h
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer.h
index d11d0c67ed6b7c..bad250ea6e866f 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_tokenizer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer.h
@@ -19,25 +19,24 @@
 
 #include <unicode/utext.h>
 
-#include "CLucene.h"
-#include "CLucene/analysis/AnalysisHeader.h"
 #include "composite_break_iterator.h"
 #include "default_icu_tokenizer_config.h"
 #include "icu_common.h"
+#include "olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer.h"
 
 using namespace lucene::analysis;
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
-class ICUTokenizer : public Tokenizer {
+class ICUTokenizer : public DorisTokenizer {
 public:
     ICUTokenizer();
-    ICUTokenizer(bool lowercase, bool ownReader);
+    ICUTokenizer(bool ownReader);
     ~ICUTokenizer() override = default;
 
     void initialize(const std::string& dictPath);
     Token* next(Token* token) override;
-    void reset(lucene::util::Reader* reader) override;
+    void reset() override;
 
 private:
     std::string utf8Str_;
@@ -47,4 +46,4 @@ class ICUTokenizer : public Tokenizer {
     CompositeBreakIteratorPtr breaker_;
 };
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_tokenizer_config.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer_config.h
similarity index 93%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_tokenizer_config.h
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer_config.h
index dd7b743e74b944..af3a1f3bee36e3 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/icu_tokenizer_config.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer_config.h
@@ -19,7 +19,7 @@
 
 #include "icu_common.h"
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
 class ICUTokenizerConfig {
 public:
@@ -34,4 +34,4 @@ class ICUTokenizerConfig {
 };
 using ICUTokenizerConfigPtr = std::shared_ptr<ICUTokenizerConfig>;
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer_factory.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer_factory.h
new file mode 100644
index 00000000000000..f750d652ad7f9a
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer_factory.h
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "icu_tokenizer.h"
+#include "olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer_factory.h"
+
+namespace doris::segment_v2::inverted_index {
+
+class ICUTokenizerFactory : public TokenizerFactory {
+public:
+    ICUTokenizerFactory() = default;
+    ~ICUTokenizerFactory() override = default;
+
+    void initialize(const Settings& settings) override {}
+
+    TokenizerPtr create() override {
+        auto tokenizer = std::make_shared<ICUTokenizer>();
+        tokenizer->initialize(config::inverted_index_dict_path + "/icu");
+        return tokenizer;
+    }
+};
+
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/script_iterator.cpp b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/script_iterator.cpp
similarity index 97%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/script_iterator.cpp
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/script_iterator.cpp
index 7fee3055d3bc87..c742991d1f07df 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/script_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/script_iterator.cpp
@@ -22,7 +22,7 @@
 #include <mutex>
 #include <string>
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
 std::vector<int32_t> ScriptIterator::k_basic_latin(128);
 
@@ -118,4 +118,4 @@ bool ScriptIterator::is_combining_mark(UChar32 codepoint) {
             type == U_ENCLOSING_MARK);
 }
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/script_iterator.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/script_iterator.h
similarity index 95%
rename from be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/script_iterator.h
rename to be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/script_iterator.h
index bc93eea8670409..3db78d25c71891 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/icu/script_iterator.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/icu/script_iterator.h
@@ -25,7 +25,7 @@
 
 #include "icu_common.h"
 
-namespace doris::segment_v2 {
+namespace doris::segment_v2::inverted_index {
 
 class ScriptIterator {
 public:
@@ -61,4 +61,4 @@ class ScriptIterator {
 };
 using ScriptIteratorPtr = std::unique_ptr<ScriptIterator>;
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_impl.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_impl.h
index b707e9add33ff6..2b27f6b4756a1f 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_impl.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_impl.h
@@ -24,8 +24,7 @@
 #include <string_view>
 #include <vector>
 
-#include "CLucene.h"
-#include "CLucene/analysis/AnalysisHeader.h"
+#include "olap/rowset/segment_v2/inverted_index/util/reader.h"
 
 namespace doris::segment_v2::inverted_index {
 #include "common/compile_check_begin.h"
@@ -168,7 +167,7 @@ class StandardTokenizerImpl {
         return {_zz_buffer.data() + _zz_start_read, (size_t)(_zz_marked_pos - _zz_start_read)};
     }
 
-    inline void yyreset(CL_NS(util)::Reader* reader) {
+    inline void yyreset(const ReaderPtr& reader) {
         _zz_reader = reader;
         _zz_at_eof = false;
         _zz_current_pos = 0;
@@ -283,7 +282,7 @@ class StandardTokenizerImpl {
 
     static const int32_t ZZ_BUFFERSIZE = 255;
 
-    CL_NS(util)::Reader* _zz_reader = nullptr;
+    ReaderPtr _zz_reader;
     std::string _zz_buffer;
 
     int32_t _zz_state = 0;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer.h b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer.h
index 6a7119b5fc56aa..8b7898f833702d 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer.h
@@ -22,8 +22,7 @@
 #include <string_view>
 
 #include "olap/rowset/segment_v2/inverted_index/token_stream.h"
-
-using TokenStreamPtr = std::shared_ptr<TokenStream>;
+#include "olap/rowset/segment_v2/inverted_index/util/reader.h"
 
 namespace doris::segment_v2::inverted_index {
 
@@ -32,7 +31,7 @@ class DorisTokenizer : public Tokenizer, public DorisTokenStream {
     DorisTokenizer() = default;
     ~DorisTokenizer() override = default;
 
-    void set_reader(CL_NS(util)::Reader* in) {
+    void set_reader(const ReaderPtr& in) {
         if (in == nullptr) {
             throw Exception(ErrorCode::INVALID_ARGUMENT, "reader must not be null");
         }
@@ -44,8 +43,8 @@ class DorisTokenizer : public Tokenizer, public DorisTokenStream {
     void reset() override { _in = _in_pending; };
 
 protected:
-    CL_NS(util)::Reader* _in = nullptr;
-    CL_NS(util)::Reader* _in_pending = nullptr;
+    ReaderPtr _in;
+    ReaderPtr _in_pending;
 };
 using TokenizerPtr = std::shared_ptr<DorisTokenizer>;
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/util/reader.h b/be/src/olap/rowset/segment_v2/inverted_index/util/reader.h
new file mode 100644
index 00000000000000..27428f491b470b
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/inverted_index/util/reader.h
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "CLucene.h"
+#include "CLucene/util/CLStreams.h"
+
+namespace doris::segment_v2::inverted_index {
+
+using ReaderPtr = std::shared_ptr<lucene::util::Reader>;
+
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index 3a862367cad29b..13aa3356f2daae 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -22,7 +22,6 @@
 #include "olap/rowset/segment_v2/inverted_index_common.h"
 #include "olap/rowset/segment_v2/inverted_index_fs_directory.h"
 #include "olap/tablet_schema.h"
-#include "olap/types.h"
 #include "util/faststring.h"
 
 namespace doris::segment_v2 {
@@ -110,8 +109,8 @@ Status InvertedIndexColumnWriter<field_type>::init_bkd_index() {
 }
 
 template <FieldType field_type>
-Result<std::unique_ptr<lucene::util::Reader>>
-InvertedIndexColumnWriter<field_type>::create_char_string_reader(CharFilterMap& char_filter_map) {
+Result<ReaderPtr> InvertedIndexColumnWriter<field_type>::create_char_string_reader(
+        CharFilterMap& char_filter_map) {
     try {
         return inverted_index::InvertedIndexAnalyzer::create_reader(char_filter_map);
     } catch (CLuceneError& e) {
@@ -339,7 +338,7 @@ void InvertedIndexColumnWriter<field_type>::new_char_token_stream(const char* s,
                 _CLTHROWA(CL_ERR_UnsupportedOperation,
                           "UnsupportedOperationException: CLStream::init");
             })
-    auto* stream = _analyzer->reusableTokenStream(field->name(), _char_string_reader.get());
+    auto* stream = _analyzer->reusableTokenStream(field->name(), _char_string_reader);
     field->setValue(stream);
 }
 
@@ -408,6 +407,7 @@ Status InvertedIndexColumnWriter<field_type>::add_array_values(size_t field_size
             return Status::InternalError("index writer is null in inverted index writer");
         }
         size_t start_off = 0;
+        std::vector<ReaderPtr> keep_readers;
         for (size_t i = 0; i < count; ++i) {
             // nullmap & value ptr-array may not from offsets[i] because olap_convertor make offsets accumulate from _base_offset which may not is 0, but nullmap & value in this segment is from 0, we only need
             // every single array row element size to go through the nullmap & value ptr-array, and also can go through the every row in array to keep with _rid++
@@ -447,15 +447,13 @@ Status InvertedIndexColumnWriter<field_type>::add_array_values(size_t field_size
                         // in this case stream need to delete after add_document, because the
                         // stream can not reuse for different field
                         bool own_token_stream = true;
-                        bool own_reader = true;
-                        std::unique_ptr<lucene::util::Reader> char_string_reader = DORIS_TRY(
+                        ReaderPtr char_string_reader = DORIS_TRY(
                                 create_char_string_reader(_inverted_index_ctx->char_filter_map));
                         char_string_reader->init(v->get_data(), cast_set<int32_t>(v->get_size()),
                                                  false);
-                        _analyzer->set_ownReader(own_reader);
-                        ts = _analyzer->tokenStream(new_field->name(),
-                                                    char_string_reader.release());
+                        ts = _analyzer->tokenStream(new_field->name(), char_string_reader);
                         new_field->setValue(ts, own_token_stream);
+                        keep_readers.emplace_back(std::move(char_string_reader));
                     } else {
                         new_field_char_value(v->get_data(), v->get_size(), new_field.get());
                     }
@@ -507,6 +505,7 @@ Status InvertedIndexColumnWriter<field_type>::add_array_values(size_t field_size
                 _doc->clear();
             }
             _rid++;
+            keep_readers.clear();
         }
     } else if constexpr (field_is_numeric_type(field_type)) {
         size_t start_off = 0;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.h b/be/src/olap/rowset/segment_v2/inverted_index_writer.h
index 361334c09bc322..5835b09b0d6c22 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.h
@@ -28,6 +28,7 @@
 #include "olap/rowset/segment_v2/common.h"
 #include "olap/rowset/segment_v2/index_file_writer.h"
 #include "olap/rowset/segment_v2/index_writer.h"
+#include "olap/rowset/segment_v2/inverted_index/util/reader.h"
 
 namespace doris {
 
@@ -38,6 +39,8 @@ struct CppTypeTraits;
 
 namespace segment_v2 {
 
+using namespace doris::segment_v2::inverted_index;
+
 template <FieldType field_type>
 class InvertedIndexColumnWriter : public IndexColumnWriter {
 public:
@@ -50,8 +53,7 @@ class InvertedIndexColumnWriter : public IndexColumnWriter {
     Status init() override;
     void close_on_error() override;
     Status init_bkd_index();
-    Result<std::unique_ptr<lucene::util::Reader>> create_char_string_reader(
-            CharFilterMap& char_filter_map);
+    Result<ReaderPtr> create_char_string_reader(CharFilterMap& char_filter_map);
     Status open_index_directory();
     std::unique_ptr<lucene::index::IndexWriter> create_index_writer();
     Status create_field(lucene::document::Field** field);
@@ -93,7 +95,7 @@ class InvertedIndexColumnWriter : public IndexColumnWriter {
     std::unique_ptr<lucene::index::IndexWriter> _index_writer = nullptr;
     std::shared_ptr<lucene::analysis::Analyzer> _analyzer = nullptr;
     std::unique_ptr<lucene::search::Similarity> _similarity = nullptr;
-    std::unique_ptr<lucene::util::Reader> _char_string_reader = nullptr;
+    ReaderPtr _char_string_reader = nullptr;
     std::shared_ptr<lucene::util::bkd::bkd_writer> _bkd_writer = nullptr;
     InvertedIndexCtxSPtr _inverted_index_ctx = nullptr;
     const KeyCoder* _value_key_coder;
diff --git a/be/src/runtime/index_policy/index_policy_mgr.cpp b/be/src/runtime/index_policy/index_policy_mgr.cpp
index f032c976999a84..035dd19f35e802 100644
--- a/be/src/runtime/index_policy/index_policy_mgr.cpp
+++ b/be/src/runtime/index_policy/index_policy_mgr.cpp
@@ -130,44 +130,66 @@ segment_v2::inverted_index::CustomAnalyzerPtr IndexPolicyMgr::get_policy_by_name
         builder.with_tokenizer_config(tokenzier_name, {});
     }
 
+    // Process char filters
+    process_filter_configs(index_policy_analyzer, PROP_CHAR_FILTER, "char filter",
+                           [&builder](const std::string& name,
+                                      const segment_v2::inverted_index::Settings& settings) {
+                               builder.add_char_filter_config(name, settings);
+                           });
+
     // Process token filters
-    auto token_filter_it = index_policy_analyzer.properties.find(PROP_TOKEN_FILTER);
-    if (token_filter_it != index_policy_analyzer.properties.end()) {
-        std::vector<std::string> token_filter_strs;
-        boost::split(token_filter_strs, token_filter_it->second, boost::is_any_of(","));
-
-        for (auto& filter_name : token_filter_strs) {
-            boost::trim(filter_name);
-            if (filter_name.empty()) {
-                continue;
-            }
+    process_filter_configs(index_policy_analyzer, PROP_TOKEN_FILTER, "token filter",
+                           [&builder](const std::string& name,
+                                      const segment_v2::inverted_index::Settings& settings) {
+                               builder.add_token_filter_config(name, settings);
+                           });
 
-            if (_name_to_id.contains(filter_name)) {
-                // Nested token filter policy
-                const auto& filter_policy = _policys[_name_to_id[filter_name]];
-                auto type_it = filter_policy.properties.find(PROP_TYPE);
-                if (type_it == filter_policy.properties.end()) {
-                    throw Exception(ErrorCode::INVALID_ARGUMENT,
-                                    "Invalid token filter configuration in policy: " + filter_name);
-                }
+    auto custom_analyzer_config = builder.build();
+    return segment_v2::inverted_index::CustomAnalyzer::build_custom_analyzer(
+            custom_analyzer_config);
+}
+
+void IndexPolicyMgr::process_filter_configs(
+        const TIndexPolicy& index_policy_analyzer, const std::string& prop_name,
+        const std::string& error_prefix,
+        std::function<void(const std::string&, const segment_v2::inverted_index::Settings&)>
+                add_config_func) {
+    auto filter_it = index_policy_analyzer.properties.find(prop_name);
+    if (filter_it == index_policy_analyzer.properties.end()) {
+        return;
+    }
+
+    std::vector<std::string> filter_strs;
+    boost::split(filter_strs, filter_it->second, boost::is_any_of(","));
 
-                segment_v2::inverted_index::Settings settings;
-                for (const auto& prop : filter_policy.properties) {
-                    if (prop.first != PROP_TYPE) {
-                        settings.set(prop.first, prop.second);
-                    }
+    for (auto& filter_name : filter_strs) {
+        boost::trim(filter_name);
+        if (filter_name.empty()) {
+            continue;
+        }
+
+        if (_name_to_id.contains(filter_name)) {
+            // Nested filter policy
+            const auto& filter_policy = _policys[_name_to_id[filter_name]];
+            auto type_it = filter_policy.properties.find(PROP_TYPE);
+            if (type_it == filter_policy.properties.end()) {
+                throw Exception(
+                        ErrorCode::INVALID_ARGUMENT,
+                        "Invalid " + error_prefix + " configuration in policy: " + filter_name);
+            }
+
+            segment_v2::inverted_index::Settings settings;
+            for (const auto& prop : filter_policy.properties) {
+                if (prop.first != PROP_TYPE) {
+                    settings.set(prop.first, prop.second);
                 }
-                builder.add_token_filter_config(type_it->second, settings);
-            } else {
-                // Simple token filter
-                builder.add_token_filter_config(filter_name, {});
             }
+            add_config_func(type_it->second, settings);
+        } else {
+            // Simple filter
+            add_config_func(filter_name, {});
         }
     }
-
-    auto custom_analyzer_config = builder.build();
-    return segment_v2::inverted_index::CustomAnalyzer::build_custom_analyzer(
-            custom_analyzer_config);
 }
 
 } // namespace doris
\ No newline at end of file
diff --git a/be/src/runtime/index_policy/index_policy_mgr.h b/be/src/runtime/index_policy/index_policy_mgr.h
index aa0b25a0448bf7..707270930fe4cb 100644
--- a/be/src/runtime/index_policy/index_policy_mgr.h
+++ b/be/src/runtime/index_policy/index_policy_mgr.h
@@ -40,9 +40,16 @@ class IndexPolicyMgr {
 
 private:
     constexpr static auto PROP_TOKENIZER = "tokenizer";
+    constexpr static auto PROP_CHAR_FILTER = "char_filter";
     constexpr static auto PROP_TOKEN_FILTER = "token_filter";
     constexpr static auto PROP_TYPE = "type";
 
+    void process_filter_configs(
+            const TIndexPolicy& index_policy_analyzer, const std::string& prop_name,
+            const std::string& error_prefix,
+            std::function<void(const std::string&, const segment_v2::inverted_index::Settings&)>
+                    add_config_func);
+
     std::shared_mutex _mutex;
 
     Policys _policys;
diff --git a/be/src/vec/functions/function_tokenize.cpp b/be/src/vec/functions/function_tokenize.cpp
index 151c0bb1e0b81c..02e597169ef636 100644
--- a/be/src/vec/functions/function_tokenize.cpp
+++ b/be/src/vec/functions/function_tokenize.cpp
@@ -105,8 +105,8 @@ void FunctionTokenize::_do_tokenize(const ColumnString& src_column_string,
 
         auto reader = InvertedIndexAnalyzer::create_reader(inverted_index_ctx.char_filter_map);
         reader->init(tokenize_str.data, (int)tokenize_str.size, true);
-        auto analyzer_tokens = InvertedIndexAnalyzer::get_analyse_result(
-                reader.get(), inverted_index_ctx.analyzer);
+        auto analyzer_tokens =
+                InvertedIndexAnalyzer::get_analyse_result(reader, inverted_index_ctx.analyzer);
 
         rapidjson::Document doc;
         doc.SetArray();
diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index 3875a73a0c5f66..33c8e34eb3b565 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -188,7 +188,7 @@ std::vector<TermInfo> FunctionMatchBase::analyse_query_str_token(
             inverted_index_ctx->char_filter_map);
     reader->init(match_query_str.data(), (int)match_query_str.size(), true);
     query_tokens = doris::segment_v2::inverted_index::InvertedIndexAnalyzer::get_analyse_result(
-            reader.get(), inverted_index_ctx->analyzer);
+            reader, inverted_index_ctx->analyzer);
     return query_tokens;
 }
 
@@ -211,7 +211,7 @@ inline std::vector<TermInfo> FunctionMatchBase::analyse_data_token(
 
             data_tokens =
                     doris::segment_v2::inverted_index::InvertedIndexAnalyzer::get_analyse_result(
-                            reader.get(), inverted_index_ctx->analyzer);
+                            reader, inverted_index_ctx->analyzer);
         }
     } else {
         const auto& str_ref = string_col->get_data_at(current_block_row_idx);
@@ -224,7 +224,7 @@ inline std::vector<TermInfo> FunctionMatchBase::analyse_data_token(
             reader->init(str_ref.data, (int)str_ref.size, true);
             data_tokens =
                     doris::segment_v2::inverted_index::InvertedIndexAnalyzer::get_analyse_result(
-                            reader.get(), inverted_index_ctx->analyzer);
+                            reader, inverted_index_ctx->analyzer);
         }
     }
     return data_tokens;
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/analyzer/icu_analyzer_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/analyzer/icu_analyzer_test.cpp
index 4cd6d180a2e911..eae5e59ffb2e62 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/analyzer/icu_analyzer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/analyzer/icu_analyzer_test.cpp
@@ -35,11 +35,11 @@ class ICUTokenizerTest : public ::testing::Test {
             analyzer.initDict("./be/dict/icu");
             analyzer.set_lowercase(false);
 
-            lucene::util::SStringReader<char> reader;
-            reader.init(s.data(), s.size(), false);
+            auto reader = std::make_shared<lucene::util::SStringReader<char>>();
+            reader->init(s.data(), s.size(), false);
 
-            std::unique_ptr<ICUTokenizer> tokenizer;
-            tokenizer.reset((ICUTokenizer*)analyzer.tokenStream(L"", &reader));
+            std::unique_ptr<inverted_index::ICUTokenizer> tokenizer;
+            tokenizer.reset((inverted_index::ICUTokenizer*)analyzer.tokenStream(L"", reader));
 
             Token t;
             while (tokenizer->next(&t)) {
@@ -572,4 +572,55 @@ TEST_F(ICUTokenizerTest, TestICUScriptExtensions) {
     }
 }
 
+TEST_F(ICUTokenizerTest, TestICUAnalyzerCreateComponentsWithLowercase) {
+    std::vector<std::string> datas;
+
+    ICUAnalyzer analyzer;
+    analyzer.initDict("./be/dict/icu");
+    analyzer.set_lowercase(true);
+
+    std::string text = "Mixed Case TEXT with Numbers 123.";
+    auto reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(text.data(), text.size(), false);
+
+    auto token_stream = analyzer.tokenStream(L"", reader);
+    ASSERT_NE(token_stream, nullptr);
+
+    Token t;
+    while (token_stream->next(&t)) {
+        std::string term(t.termBuffer<char>(), t.termLength<char>());
+        datas.emplace_back(term);
+    }
+
+    std::vector<std::string> expected = {"mixed", "case", "text", "with", "numbers", "123"};
+    ASSERT_EQ(datas.size(), expected.size());
+    for (size_t i = 0; i < datas.size(); i++) {
+        ASSERT_EQ(datas[i], expected[i]);
+    }
+
+    delete token_stream;
+}
+
+TEST_F(ICUTokenizerTest, TestICUAnalyzerTokenStreamThrowsException) {
+    ICUAnalyzer analyzer;
+    analyzer.initDict("./be/dict/icu");
+
+    std::string text = "Hello World!";
+    auto reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(text.data(), text.size(), false);
+
+    EXPECT_THROW({ analyzer.tokenStream(L"", reader.get()); }, Exception);
+}
+
+TEST_F(ICUTokenizerTest, TestICUAnalyzerReusableTokenStreamThrowsException) {
+    ICUAnalyzer analyzer;
+    analyzer.initDict("./be/dict/icu");
+
+    std::string text = "Hello World!";
+    auto reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(text.data(), text.size(), false);
+
+    EXPECT_THROW({ analyzer.reusableTokenStream(L"", reader.get()); }, Exception);
+}
+
 } // namespace doris::segment_v2
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/analyzer/simple_analyzer_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/analyzer/simple_analyzer_test.cpp
index 6dba8233a2ed91..6b36abcd56db72 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/analyzer/simple_analyzer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/analyzer/simple_analyzer_test.cpp
@@ -31,11 +31,11 @@ std::vector<std::string> tokenize(const std::string& s, bool lowercase = false)
         BasicAnalyzer analyzer;
         analyzer.set_lowercase(lowercase);
 
-        lucene::util::SStringReader<char> reader;
-        reader.init(s.data(), s.size(), false);
+        auto reader = std::make_shared<lucene::util::SStringReader<char>>();
+        reader->init(s.data(), s.size(), false);
 
-        std::unique_ptr<BasicTokenizer> tokenizer;
-        tokenizer.reset((BasicTokenizer*)analyzer.tokenStream(L"", &reader));
+        std::unique_ptr<inverted_index::BasicTokenizer> tokenizer;
+        tokenizer.reset((inverted_index::BasicTokenizer*)analyzer.tokenStream(L"", reader));
 
         Token t;
         while (tokenizer->next(&t)) {
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/ananlyzer/custom_analyzer_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/ananlyzer/custom_analyzer_test.cpp
index f71a48ee67c6cf..0f18b4d5e600b4 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/ananlyzer/custom_analyzer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/ananlyzer/custom_analyzer_test.cpp
@@ -94,12 +94,12 @@ class CustomAnalyzerTest : public ::testing::Test {
 };
 
 int32_t tokenize(const CustomAnalyzerPtr& custom_analyzer, const std::vector<std::string>& lines) {
-    lucene::util::SStringReader<char> reader;
+    auto reader = std::make_shared<lucene::util::SStringReader<char>>();
     size_t total_count = 0;
     Token t;
     for (size_t i = 0; i < lines.size(); ++i) {
-        reader.init(lines[i].data(), lines[i].size(), false);
-        auto* token_stream = custom_analyzer->reusableTokenStream(L"", &reader);
+        reader->init(lines[i].data(), lines[i].size(), false);
+        auto* token_stream = custom_analyzer->reusableTokenStream(L"", reader);
         token_stream->reset();
         while (token_stream->next(&t)) {
             total_count++;
@@ -120,9 +120,9 @@ struct ExpectedToken {
 std::vector<ExpectedToken> tokenize1(const CustomAnalyzerPtr& custom_analyzer,
                                      const std::string line) {
     std::vector<ExpectedToken> results;
-    lucene::util::SStringReader<char> reader;
-    reader.init(line.data(), line.size(), false);
-    auto* token_stream = custom_analyzer->reusableTokenStream(L"", &reader);
+    auto reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(line.data(), line.size(), false);
+    auto* token_stream = custom_analyzer->reusableTokenStream(L"", reader);
     token_stream->reset();
     Token t;
     while (token_stream->next(&t)) {
@@ -196,9 +196,69 @@ TEST_F(CustomAnalyzerTest, CustomNgramAnalyzer) {
     }
 }
 
+TEST_F(CustomAnalyzerTest, TokenStreamNotSupported) {
+    CustomAnalyzerConfig::Builder builder;
+    builder.with_tokenizer_config("standard", {});
+    auto custom_analyzer_config = builder.build();
+    auto custom_analyzer = CustomAnalyzer::build_custom_analyzer(custom_analyzer_config);
+
+    auto reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init("test content", 12, false);
+
+    EXPECT_THROW({ custom_analyzer->tokenStream(L"field", reader.get()); }, Exception);
+
+    EXPECT_THROW({ custom_analyzer->reusableTokenStream(L"field", reader.get()); }, Exception);
+}
+
+TEST_F(CustomAnalyzerTest, ReusableTokenStreamNotSupported) {
+    CustomAnalyzerConfig::Builder builder;
+    builder.with_tokenizer_config("standard", {});
+    auto custom_analyzer_config = builder.build();
+    auto custom_analyzer = CustomAnalyzer::build_custom_analyzer(custom_analyzer_config);
+
+    auto reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init("test content", 12, false);
+
+    EXPECT_THROW({ custom_analyzer->reusableTokenStream(L"field", reader.get()); }, Exception);
+
+    try {
+        custom_analyzer->reusableTokenStream(L"field", reader.get());
+        FAIL() << "Expected Exception to be thrown";
+    } catch (const Exception& e) {
+        EXPECT_EQ(e.code(), ErrorCode::INVERTED_INDEX_NOT_SUPPORTED);
+        EXPECT_STREQ(e.what(), "[E-6001] CustomAnalyzer::reusableTokenStream not supported");
+    }
+}
+
+TEST_F(CustomAnalyzerTest, TokenStreamWithReaderPtr) {
+    CustomAnalyzerConfig::Builder builder;
+    builder.with_tokenizer_config("standard", {});
+    builder.add_token_filter_config("lowercase", {});
+    auto custom_analyzer_config = builder.build();
+    auto custom_analyzer = CustomAnalyzer::build_custom_analyzer(custom_analyzer_config);
+
+    auto reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init("Hello World Test", 16, false);
+
+    auto* token_stream = custom_analyzer->tokenStream(L"field", reader);
+    EXPECT_NE(token_stream, nullptr);
+
+    Token t;
+    std::vector<std::string> tokens;
+    token_stream->reset();
+    while (token_stream->next(&t)) {
+        tokens.emplace_back(std::string(t.termBuffer<char>(), t.termLength<char>()));
+    }
+
+    std::vector<std::string> expected = {"hello", "world", "test"};
+    EXPECT_EQ(tokens, expected);
+
+    delete token_stream;
+}
+
 // TEST_F(CustomAnalyzerTest, test) {
 //     std::string name = "name";
-//     std::string path = "/mnt/disk2/yangsiyu/clucene/index";
+//     std::string path = "/mnt/disk3/yangsiyu/clucene";
 
 //     std::vector<std::string> lines;
 
@@ -213,145 +273,157 @@ TEST_F(CustomAnalyzerTest, CustomNgramAnalyzer) {
 
 //     std::cout << "lines size: " << lines.size() << std::endl;
 
+//     Settings char_replace_params;
+//     char_replace_params.set("char_filter_pattern", "_");
+//     char_replace_params.set("char_filter_replacement", " ");
+
 //     Settings word_delimiter_params;
 //     word_delimiter_params.set("preserve_original", "true");
 
 //     CustomAnalyzerConfig::Builder builder;
 //     builder.with_tokenizer_config("standard", {});
-//     builder.add_token_filter_config("word_delimiter", word_delimiter_params);
+//     builder.add_char_filter_config("char_replace", char_replace_params);
+//     // builder.add_token_filter_config("word_delimiter", word_delimiter_params);
 //     // builder.add_token_filter_config("asciifolding", {});
-//     // builder.add_token_filter_config("lowercase", {});
+//     builder.add_token_filter_config("lowercase", {});
 //     auto custom_analyzer_config = builder.build();
 
 //     auto custom_analyzer = CustomAnalyzer::build_custom_analyzer(custom_analyzer_config);
 
-//     {
-//         TimeGuard t("load time");
+//     auto result = tokenize1(custom_analyzer, lines[0]);
+//     for (const auto& token : result) {
+//         std::cout << token.term << " " << token.pos << std::endl;
+//     }
 
-//         lucene::index::IndexWriter indexwriter(path.c_str(), custom_analyzer.get(), true);
-//         indexwriter.setRAMBufferSizeMB(512);
-//         indexwriter.setMaxFieldLength(0x7FFFFFFFL);
-//         indexwriter.setMergeFactor(1000000000);
-//         indexwriter.setUseCompoundFile(false);
+//     // {
+//     //     TimeGuard t("load time");
 
-//         lucene::util::SStringReader<char> reader;
+//     //     lucene::index::IndexWriter indexwriter(path.c_str(), custom_analyzer.get(), true);
+//     //     indexwriter.setRAMBufferSizeMB(512);
+//     //     indexwriter.setMaxFieldLength(0x7FFFFFFFL);
+//     //     indexwriter.setMergeFactor(1000000000);
+//     //     indexwriter.setUseCompoundFile(false);
 
-//         lucene::document::Document doc;
-//         int32_t field_config = lucene::document::Field::STORE_NO;
-//         field_config |= lucene::document::Field::INDEX_NONORMS;
-//         field_config |= lucene::document::Field::INDEX_TOKENIZED;
-//         auto field_name = std::wstring(name.begin(), name.end());
-//         auto* field = _CLNEW lucene::document::Field(field_name.c_str(), field_config);
-//         field->setOmitTermFreqAndPositions(false);
-//         doc.add(*field);
+//     //     auto reader = std::make_shared<lucene::util::SStringReader<char>>();
 
-//         for (int32_t j = 0; j < 1; j++) {
-//             for (size_t k = 0; k < lines.size(); k++) {
-//                 reader.init(lines[k].data(), lines[k].size(), false);
-//                 auto* stream = custom_analyzer->reusableTokenStream(field->name(), &reader);
-//                 field->setValue(stream);
+//     //     lucene::document::Document doc;
+//     //     int32_t field_config = lucene::document::Field::STORE_NO;
+//     //     field_config |= lucene::document::Field::INDEX_NONORMS;
+//     //     field_config |= lucene::document::Field::INDEX_TOKENIZED;
+//     //     auto field_name = std::wstring(name.begin(), name.end());
+//     //     auto* field = _CLNEW lucene::document::Field(field_name.c_str(), field_config);
+//     //     field->setOmitTermFreqAndPositions(false);
+//     //     doc.add(*field);
 
-//                 indexwriter.addDocument(&doc);
-//             }
-//         }
+//     //     for (int32_t j = 0; j < 1; j++) {
+//     //         for (size_t k = 0; k < lines.size(); k++) {
+//     //             reader->init(lines[k].data(), lines[k].size(), false);
+//     //             auto* stream = custom_analyzer->reusableTokenStream(field->name(), reader);
+//     //             field->setValue(stream);
 
-//         std::cout << "---------------------" << std::endl;
+//     //             indexwriter.addDocument(&doc);
+//     //         }
+//     //     }
 
-//         indexwriter.close();
-//     }
+//     //     std::cout << "---------------------" << std::endl;
 
-//     std::cout << "-----------" << std::endl;
-
-//     try {
-//         {
-//             auto* dir = FSDirectory::getDirectory(path.c_str());
-//             auto* reader = IndexReader::open(dir, 1024 * 1024, true);
-//             auto searcher = std::make_shared<IndexSearcher>(reader);
-
-//             // std::cout << "macDoc: " << reader->maxDoc() << std::endl;
-
-//             {
-//                 TimeGuard time("query time");
-
-//                 {
-//                     TQueryOptions query_options;
-//                     doris::segment_v2::PhraseQuery query(searcher, query_options, nullptr);
-
-//                     InvertedIndexQueryInfo query_info;
-//                     query_info.field_name = L"name";
-//                     {
-//                         doris::segment_v2::TermInfo t;
-//                         t.term = "Super_Duper";
-//                         t.position = 1;
-//                         query_info.term_infos.emplace_back(std::move(t));
-//                     }
-//                     {
-//                         doris::segment_v2::TermInfo t;
-//                         t.term = "Super";
-//                         t.position = 1;
-//                         query_info.term_infos.emplace_back(std::move(t));
-//                     }
-//                     {
-//                         doris::segment_v2::TermInfo t;
-//                         t.term = "Duper";
-//                         t.position = 2;
-//                         query_info.term_infos.emplace_back(std::move(t));
-//                     }
-//                     {
-//                         doris::segment_v2::TermInfo t;
-//                         t.term = "c";
-//                         t.position = 3;
-//                         query_info.term_infos.emplace_back(std::move(t));
-//                     }
-//                     query_info.slop = 1;
-//                     query_info.ordered = true;
-//                     query.add(query_info);
-
-//                     roaring::Roaring result;
-//                     query.search(result);
-
-//                     std::cout << "phrase_query count: " << result.cardinality() << std::endl;
-//                 }
-//                 // {
-//                 //     TQueryOptions query_options;
-//                 //     doris::segment_v2::PhrasePrefixQuery query(searcher, query_options, nullptr);
-
-//                 //     InvertedIndexQueryInfo query_info;
-//                 //     query_info.field_name = L"name";
-//                 //     {
-//                 //         doris::segment_v2::TermInfo t;
-//                 //         t.term = "Super_Duper";
-//                 //         t.position = 1;
-//                 //         query_info.term_infos.emplace_back(std::move(t));
-//                 //     }
-//                 //     {
-//                 //         doris::segment_v2::TermInfo t;
-//                 //         t.term = "Super";
-//                 //         t.position = 1;
-//                 //         query_info.term_infos.emplace_back(std::move(t));
-//                 //     }
-//                 //     {
-//                 //         doris::segment_v2::TermInfo t;
-//                 //         t.term = "Dup";
-//                 //         t.position = 2;
-//                 //         query_info.term_infos.emplace_back(std::move(t));
-//                 //     }
-//                 //     query.add(query_info);
-
-//                 //     roaring::Roaring result;
-//                 //     query.search(result);
-
-//                 //     std::cout << "phrase_prefix_query count: " << result.cardinality() << std::endl;
-//                 // }
-//             }
-
-//             reader->close();
-//             _CLLDELETE(reader);
-//             _CLDECDELETE(dir);
-//         }
-//     } catch (const CLuceneError& e) {
-//         std::cout << e.number() << ": " << e.what() << std::endl;
-//     }
+//     //     indexwriter.close();
+//     // }
+
+//     // std::cout << "-----------" << std::endl;
+
+//     // try {
+//     //     {
+//     //         auto* dir = FSDirectory::getDirectory(path.c_str());
+//     //         auto* reader = IndexReader::open(dir, 1024 * 1024, true);
+//     //         auto searcher = std::make_shared<IndexSearcher>(reader);
+
+//     //         // std::cout << "macDoc: " << reader->maxDoc() << std::endl;
+
+//     //         {
+//     //             TimeGuard time("query time");
+
+//     //             {
+//     //                 IndexQueryContextPtr context = std::make_shared<IndexQueryContext>();
+
+//     //                 TQueryOptions query_options;
+//     //                 doris::segment_v2::PhraseQuery query(searcher, context);
+
+//     //                 InvertedIndexQueryInfo query_info;
+//     //                 query_info.field_name = L"name";
+//     //                 {
+//     //                     doris::segment_v2::TermInfo t;
+//     //                     t.term = "Super_Duper";
+//     //                     t.position = 1;
+//     //                     query_info.term_infos.emplace_back(std::move(t));
+//     //                 }
+//     //                 {
+//     //                     doris::segment_v2::TermInfo t;
+//     //                     t.term = "Super";
+//     //                     t.position = 1;
+//     //                     query_info.term_infos.emplace_back(std::move(t));
+//     //                 }
+//     //                 {
+//     //                     doris::segment_v2::TermInfo t;
+//     //                     t.term = "Duper";
+//     //                     t.position = 2;
+//     //                     query_info.term_infos.emplace_back(std::move(t));
+//     //                 }
+//     //                 {
+//     //                     doris::segment_v2::TermInfo t;
+//     //                     t.term = "c";
+//     //                     t.position = 3;
+//     //                     query_info.term_infos.emplace_back(std::move(t));
+//     //                 }
+//     //                 query_info.slop = 1;
+//     //                 query_info.ordered = true;
+//     //                 query.add(query_info);
+
+//     //                 roaring::Roaring result;
+//     //                 query.search(result);
+
+//     //                 std::cout << "phrase_query count: " << result.cardinality() << std::endl;
+//     //             }
+//     //             // {
+//     //             //     TQueryOptions query_options;
+//     //             //     doris::segment_v2::PhrasePrefixQuery query(searcher, query_options, nullptr);
+
+//     //             //     InvertedIndexQueryInfo query_info;
+//     //             //     query_info.field_name = L"name";
+//     //             //     {
+//     //             //         doris::segment_v2::TermInfo t;
+//     //             //         t.term = "Super_Duper";
+//     //             //         t.position = 1;
+//     //             //         query_info.term_infos.emplace_back(std::move(t));
+//     //             //     }
+//     //             //     {
+//     //             //         doris::segment_v2::TermInfo t;
+//     //             //         t.term = "Super";
+//     //             //         t.position = 1;
+//     //             //         query_info.term_infos.emplace_back(std::move(t));
+//     //             //     }
+//     //             //     {
+//     //             //         doris::segment_v2::TermInfo t;
+//     //             //         t.term = "Dup";
+//     //             //         t.position = 2;
+//     //             //         query_info.term_infos.emplace_back(std::move(t));
+//     //             //     }
+//     //             //     query.add(query_info);
+
+//     //             //     roaring::Roaring result;
+//     //             //     query.search(result);
+
+//     //             //     std::cout << "phrase_prefix_query count: " << result.cardinality() << std::endl;
+//     //             // }
+//     //         }
+
+//     //         reader->close();
+//     //         _CLLDELETE(reader);
+//     //         _CLDECDELETE(dir);
+//     //     }
+//     // } catch (const CLuceneError& e) {
+//     //     std::cout << e.number() << ": " << e.what() << std::endl;
+//     // }
 // }
 
 } // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_test.cpp
new file mode 100644
index 00000000000000..020f0db8d11199
--- /dev/null
+++ b/be/test/olap/rowset/segment_v2/inverted_index/char_filter/char_filter_test.cpp
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/inverted_index/char_filter/char_filter.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "olap/rowset/segment_v2/inverted_index/util/reader.h"
+
+using namespace lucene::analysis;
+
+namespace doris::segment_v2::inverted_index {
+
+class MockDorisCharFilter : public DorisCharFilter {
+public:
+    MockDorisCharFilter(ReaderPtr reader) : DorisCharFilter(std::move(reader)) {}
+    ~MockDorisCharFilter() override = default;
+
+    void initialize() override {}
+
+    // 实现必需的虚函数
+    void init(const void* _value, int32_t _length, bool copyData) override {
+        if (_reader) {
+            _reader->init(_value, _length, copyData);
+        }
+    }
+
+    int32_t read(const void** start, int32_t min, int32_t max) override {
+        if (_reader) {
+            return _reader->read(start, min, max);
+        }
+        return -1;
+    }
+
+    int32_t readCopy(void* start, int32_t off, int32_t len) override {
+        if (_reader) {
+            return _reader->readCopy(start, off, len);
+        }
+        return -1;
+    }
+};
+
+class DorisCharFilterTest : public ::testing::Test {
+protected:
+    void SetUp() override { _mock_reader = std::make_shared<lucene::util::SStringReader<char>>(); }
+
+    ReaderPtr _mock_reader;
+};
+
+TEST_F(DorisCharFilterTest, ExceptionThrowing) {
+    auto filter = std::make_shared<MockDorisCharFilter>(_mock_reader);
+
+    EXPECT_THROW(filter->position(), doris::Exception);
+
+    EXPECT_THROW(filter->skip(10), doris::Exception);
+
+    EXPECT_THROW(filter->size(), doris::Exception);
+}
+
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter_factory_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter_factory_test.cpp
new file mode 100644
index 00000000000000..28bcc33f9ca3ea
--- /dev/null
+++ b/be/test/olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter_factory_test.cpp
@@ -0,0 +1,193 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/inverted_index/char_filter/char_replace_char_filter_factory.h"
+
+#include <gtest/gtest.h>
+
+namespace doris::segment_v2::inverted_index {
+
+ReaderPtr create_char_replace_filter(const std::string& text, const std::string& pattern,
+                                     const std::string& replacement = " ") {
+    ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(text.data(), text.size(), false);
+
+    Settings settings;
+    settings.set(CHAR_REPLACE_PATTERN, pattern);
+    settings.set(CHAR_REPLACE_REPLACEMENT, replacement);
+
+    CharReplaceCharFilterFactory factory;
+    factory.initialize(settings);
+    auto char_filter = factory.create(reader);
+    return char_filter;
+}
+
+struct ExpectedOutput {
+    std::string text;
+    std::string expected;
+};
+
+class CharReplaceCharFilterFactoryTest : public ::testing::Test {
+protected:
+    void assert_char_filter_output(const std::string& input_text, const std::string& pattern,
+                                   const std::string& expected_output,
+                                   const std::string& replacement = " ") {
+        auto char_filter = create_char_replace_filter(input_text, pattern, replacement);
+
+        const void* data = nullptr;
+        int32_t read_len = char_filter->read(&data, 0, char_filter->size());
+        ASSERT_GT(read_len, 0) << "Failed to read from char filter";
+
+        std::string result(static_cast<const char*>(data), read_len);
+        EXPECT_EQ(result, expected_output) << "Char filter output mismatch";
+    }
+};
+
+TEST_F(CharReplaceCharFilterFactoryTest, BasicReplacement) {
+    assert_char_filter_output("hello,world", ",", "hello world");
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, MultipleReplacements) {
+    assert_char_filter_output("a,b,c,d", ",", "a b c d");
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, CustomReplacement) {
+    assert_char_filter_output("hello,world", ",", "hello_world", "_");
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, MultiplePatternChars) {
+    assert_char_filter_output("a,b;c:d", ",;:", "a b c d");
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, NoMatch) {
+    assert_char_filter_output("hello world", "x", "hello world");
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, EmptyInput) {
+    auto char_filter = create_char_replace_filter("", ",");
+
+    const void* data = nullptr;
+    int32_t read_len = char_filter->read(&data, 0, char_filter->size());
+
+    // For empty input, read should return -1 (EOF)
+    EXPECT_EQ(read_len, -1) << "Empty input should return EOF";
+    EXPECT_TRUE(data == nullptr || char_filter->size() == 0)
+            << "No data should be available for empty input";
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, EmptyPattern) {
+    try {
+        assert_char_filter_output("hello,world", "", "hello,world");
+    } catch (const Exception& e) {
+        EXPECT_EQ(e.code(), ErrorCode::INVALID_ARGUMENT);
+    }
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, AllCharsMatch) {
+    assert_char_filter_output("abc", "abc", "   ");
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, ChineseCharacters) {
+    assert_char_filter_output("你好,世界", ",", "你好 世界");
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, SpecialCharacters) {
+    assert_char_filter_output("test@example.com", "@.", "test example com");
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, FactoryInitialization) {
+    Settings settings;
+    settings.set(CHAR_REPLACE_PATTERN, ",");
+    settings.set(CHAR_REPLACE_REPLACEMENT, " ");
+
+    CharReplaceCharFilterFactory factory;
+    EXPECT_NO_THROW(factory.initialize(settings));
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, FactoryInitializationMissingPattern) {
+    Settings settings;
+    // Missing pattern - should throw exception
+    settings.set(CHAR_REPLACE_REPLACEMENT, " ");
+
+    CharReplaceCharFilterFactory factory;
+    EXPECT_NO_THROW(factory.initialize(settings));
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, FactoryInitializationEmptyPattern) {
+    Settings settings;
+    settings.set(CHAR_REPLACE_PATTERN, "");
+    settings.set(CHAR_REPLACE_REPLACEMENT, " ");
+
+    CharReplaceCharFilterFactory factory;
+    EXPECT_THROW(factory.initialize(settings), Exception);
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, FactoryCreateFilter) {
+    Settings settings;
+    settings.set(CHAR_REPLACE_PATTERN, ",");
+    settings.set(CHAR_REPLACE_REPLACEMENT, " ");
+
+    CharReplaceCharFilterFactory factory;
+    factory.initialize(settings);
+
+    ReaderPtr input_reader = std::make_shared<lucene::util::SStringReader<char>>();
+    input_reader->init("test,data", 9, false);
+
+    auto char_filter = factory.create(input_reader);
+    ASSERT_NE(char_filter, nullptr);
+
+    const void* data = nullptr;
+    int32_t read_len = char_filter->read(&data, 0, char_filter->size());
+    ASSERT_GT(read_len, 0);
+
+    std::string result(static_cast<const char*>(data), read_len);
+    EXPECT_EQ(result, "test data");
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, DefaultReplacement) {
+    Settings settings;
+    settings.set(CHAR_REPLACE_PATTERN, ",");
+    // No replacement specified - should use default " "
+
+    CharReplaceCharFilterFactory factory;
+    factory.initialize(settings);
+
+    ReaderPtr input_reader = std::make_shared<lucene::util::SStringReader<char>>();
+    input_reader->init("a,b,c", 5, false);
+
+    auto char_filter = factory.create(input_reader);
+
+    const void* data = nullptr;
+    int32_t read_len = char_filter->read(&data, 0, char_filter->size());
+    ASSERT_GT(read_len, 0);
+
+    std::string result(static_cast<const char*>(data), read_len);
+    EXPECT_EQ(result, "a b c");
+}
+
+TEST_F(CharReplaceCharFilterFactoryTest, EdgeCases) {
+    // Test with whitespace only
+    assert_char_filter_output("   ", ",", "   ");
+
+    // Test with only pattern characters
+    assert_char_filter_output(",,,", ",", "   ");
+
+    // Test with mixed content
+    assert_char_filter_output("a,,b,", ",", "a  b ");
+}
+
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query_test.cpp
index 62c04519ee1d0c..5e0e783ca8e4f1 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query_test.cpp
@@ -89,7 +89,7 @@ class BooleanQueryTest : public testing::Test {
         indexwriter->setMergeFactor(1000000000);
         indexwriter->setUseCompoundFile(false);
 
-        auto* char_string_reader = _CLNEW lucene::util::SStringReader<char>;
+        auto char_string_reader = std::make_shared<lucene::util::SStringReader<char>>();
 
         auto* doc = _CLNEW lucene::document::Document();
         int32_t field_config = lucene::document::Field::STORE_NO;
@@ -114,7 +114,6 @@ class BooleanQueryTest : public testing::Test {
 
         _CLLDELETE(indexwriter);
         _CLLDELETE(doc);
-        _CLLDELETE(char_string_reader);
     }
 };
 
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/setting_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/setting_test.cpp
index 84dd8d11a42841..e616ae0362040a 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/setting_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/setting_test.cpp
@@ -111,21 +111,19 @@ TEST_F(SettingsTest, GetStringReturnsCorrectValues) {
 TEST_F(SettingsTest, GetEntryListReturnsCorrectValues) {
     Settings settings(testMap);
 
-    auto emptyList = settings.get_entry_list("list_empty");
-    EXPECT_TRUE(emptyList.empty());
+    // auto emptyList = settings.get_entry_list("list_empty");
+    // EXPECT_TRUE(emptyList.empty());
 
-    auto singleList = settings.get_entry_list("list_single");
-    ASSERT_EQ(singleList.size(), 1);
-    EXPECT_EQ(singleList[0], "item1");
+    // auto singleList = settings.get_entry_list("list_single");
+    // ASSERT_EQ(singleList.size(), 1);
+    // EXPECT_EQ(singleList[0], "item1");
 
     auto multiList = settings.get_entry_list("list_multiple");
-    ASSERT_EQ(multiList.size(), 3);
-    EXPECT_EQ(multiList[0], "item1");
-    EXPECT_EQ(multiList[1], "item2");
-    EXPECT_EQ(multiList[2], "item3");
+    ASSERT_EQ(multiList.size(), 1);
+    EXPECT_EQ(multiList[0], "item1][item2][item3");
 
-    auto nonExistent = settings.get_entry_list("non_existent");
-    EXPECT_TRUE(nonExistent.empty());
+    // auto nonExistent = settings.get_entry_list("non_existent");
+    // EXPECT_TRUE(nonExistent.empty());
 }
 
 TEST_F(SettingsTest, GetWordSetReturnsCorrectValues) {
@@ -181,4 +179,79 @@ TEST_F(SettingsTest, MoveConstructorWorks) {
     EXPECT_TRUE(settings1.empty());
 }
 
+TEST_F(SettingsTest, GetEntryListWithBracketsInside) {
+    Settings settings;
+
+    settings.set("list_with_brackets_inside", "[item[with]brackets]");
+    auto singleWithBrackets = settings.get_entry_list("list_with_brackets_inside");
+    ASSERT_EQ(singleWithBrackets.size(), 1);
+    EXPECT_EQ(singleWithBrackets[0], "item[with]brackets");
+
+    settings.set("list_multiple_with_brackets",
+                 "[item1[with]brackets][item2[also]has[brackets]][item3]");
+    auto multiWithBrackets = settings.get_entry_list("list_multiple_with_brackets");
+    ASSERT_EQ(multiWithBrackets.size(), 1);
+    EXPECT_EQ(multiWithBrackets[0], "item1[with]brackets][item2[also]has[brackets]][item3");
+
+    settings.set("list_nested_brackets", "[[[nested]]][[double]][single]");
+    auto nestedBrackets = settings.get_entry_list("list_nested_brackets");
+    ASSERT_EQ(nestedBrackets.size(), 1);
+    EXPECT_EQ(nestedBrackets[0], "[[nested]]][[double]][single");
+
+    settings.set("list_empty_brackets_inside", "[item[]with][empty][]brackets");
+    EXPECT_THROW(settings.get_entry_list("list_empty_brackets_inside"), Exception);
+}
+
+TEST_F(SettingsTest, GetEntryListWithCommaSeparators) {
+    Settings settings;
+
+    settings.set("list_comma_separated", "[item1],[item2],[item3]");
+    auto commaList = settings.get_entry_list("list_comma_separated");
+    ASSERT_EQ(commaList.size(), 3);
+    EXPECT_EQ(commaList[0], "item1");
+    EXPECT_EQ(commaList[1], "item2");
+    EXPECT_EQ(commaList[2], "item3");
+
+    settings.set("list_with_empty_items", "[item1],[],[item3]");
+    auto listWithEmpty = settings.get_entry_list("list_with_empty_items");
+    ASSERT_EQ(listWithEmpty.size(), 2);
+    EXPECT_EQ(listWithEmpty[0], "item1");
+    EXPECT_EQ(listWithEmpty[1], "item3");
+
+    settings.set("list_comma_separated1", "[item1], [item2], [item3]");
+    auto commaList1 = settings.get_entry_list("list_comma_separated1");
+    ASSERT_EQ(commaList1.size(), 3);
+    EXPECT_EQ(commaList1[0], "item1");
+    EXPECT_EQ(commaList1[1], "item2");
+    EXPECT_EQ(commaList1[2], "item3");
+
+    settings.set("list_with_empty_items1", "");
+    auto listWithEmpty1 = settings.get_entry_list("list_with_empty_items1");
+    ASSERT_EQ(listWithEmpty1.size(), 0);
+}
+
+TEST_F(SettingsTest, GetEntryListSpecExamples) {
+    Settings settings;
+
+    settings.set("ex_empty", "[]");
+    auto v0 = settings.get_entry_list("ex_empty");
+    EXPECT_TRUE(v0.empty());
+
+    settings.set("ex_nested_balanced", "[[123]]");
+    auto v1 = settings.get_entry_list("ex_nested_balanced");
+    ASSERT_EQ(v1.size(), 1);
+    EXPECT_EQ(v1[0], "[123]");
+
+    settings.set("ex_nested_unbalanced_inside", "[[123[]");
+    auto v2 = settings.get_entry_list("ex_nested_unbalanced_inside");
+    ASSERT_EQ(v2.size(), 1);
+    EXPECT_EQ(v2[0], "[123[");
+
+    settings.set("ex_no_comma_multiple", "[123][123");
+    EXPECT_THROW(settings.get_entry_list("ex_no_comma_multiple"), Exception);
+
+    settings.set("ex_comma_missing_closing", "[123],[123");
+    EXPECT_THROW(settings.get_entry_list("ex_comma_missing_closing"), Exception);
+}
+
 } // namespace doris::segment_v2::inverted_index
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter_factory_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter_factory_test.cpp
index 8187a8bf64cc60..ce659971367ffc 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter_factory_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter_factory_test.cpp
@@ -24,14 +24,14 @@
 namespace doris::segment_v2::inverted_index {
 
 TokenStreamPtr create_filter(const std::string& text, Settings token_filter_settings) {
-    static lucene::util::SStringReader<char> reader;
-    reader.init(text.data(), text.size(), false);
+    ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(text.data(), text.size(), false);
 
     Settings settings;
     KeywordTokenizerFactory tokenizer_factory;
     tokenizer_factory.initialize(settings);
     auto tokenizer = tokenizer_factory.create();
-    tokenizer->set_reader(&reader);
+    tokenizer->set_reader(reader);
 
     ASCIIFoldingFilterFactory token_filter_factory;
     token_filter_factory.initialize(token_filter_settings);
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/token_filter/lower_case_filter_factory_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/token_filter/lower_case_filter_factory_test.cpp
index 588dc0381a1fbf..010c3b053b46d4 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/token_filter/lower_case_filter_factory_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/token_filter/lower_case_filter_factory_test.cpp
@@ -25,13 +25,13 @@
 namespace doris::segment_v2::inverted_index {
 
 TokenStreamPtr create_lowercase_filter(const std::string& text, Settings settings = Settings()) {
-    static lucene::util::SStringReader<char> reader;
-    reader.init(text.data(), text.size(), false);
+    ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(text.data(), text.size(), false);
 
     KeywordTokenizerFactory tokenizer_factory;
     tokenizer_factory.initialize(Settings());
     auto tokenizer = tokenizer_factory.create();
-    tokenizer->set_reader(&reader);
+    tokenizer->set_reader(reader);
 
     LowerCaseFilterFactory filter_factory;
     filter_factory.initialize(settings);
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_test.cpp
index 4d8fa8545daaab..fbbecb8c021f3d 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_test.cpp
@@ -31,14 +31,14 @@ namespace doris::segment_v2::inverted_index {
 
 TokenStreamPtr create_filter(const std::string& text, int32_t flags,
                              const std::unordered_set<std::string>& prot_words = {}) {
-    static lucene::util::SStringReader<char> reader;
-    reader.init(text.data(), text.size(), false);
+    ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(text.data(), text.size(), false);
 
     Settings settings;
     KeywordTokenizerFactory tokenizer_factory;
     tokenizer_factory.initialize(settings);
     auto tokenizer = tokenizer_factory.create();
-    tokenizer->set_reader(&reader);
+    tokenizer->set_reader(reader);
 
     auto token_filter = std::make_shared<WordDelimiterFilter>(
             tokenizer, WordDelimiterIterator::DEFAULT_WORD_DELIM_TABLE, flags, prot_words);
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/basic_tokenizer_factory_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/basic_tokenizer_factory_test.cpp
new file mode 100644
index 00000000000000..49386133377c28
--- /dev/null
+++ b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/basic_tokenizer_factory_test.cpp
@@ -0,0 +1,189 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/inverted_index/tokenizer/basic/basic_tokenizer_factory.h"
+
+#include <gtest/gtest.h>
+
+namespace doris::segment_v2::inverted_index {
+
+TokenStreamPtr create_basic_tokenizer(const std::string& text, Settings settings = Settings()) {
+    ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(text.data(), text.size(), false);
+
+    BasicTokenizerFactory factory;
+    factory.initialize(settings);
+    auto tokenizer = factory.create();
+    tokenizer->set_reader(reader);
+    tokenizer->reset();
+    return tokenizer;
+}
+
+struct ExpectedToken {
+    std::string term;
+    int pos_inc;
+};
+
+class BasicTokenizerFactoryTest : public ::testing::Test {
+protected:
+    void assert_tokenizer_output(const std::string& text,
+                                 const std::vector<ExpectedToken>& expected,
+                                 BasicTokenizerMode mode = BasicTokenizerMode::L1) {
+        Settings settings;
+        settings.set("mode", std::to_string(static_cast<int32_t>(mode)));
+        auto tokenizer = create_basic_tokenizer(text, settings);
+
+        Token t;
+        size_t i = 0;
+        while (tokenizer->next(&t)) {
+            ASSERT_LT(i, expected.size()) << "More tokens produced than expected";
+            std::string term(t.termBuffer<char>(), t.termLength<char>());
+            EXPECT_EQ(term, expected[i].term) << "Term mismatch at index " << i;
+            EXPECT_EQ(t.getPositionIncrement(), expected[i].pos_inc)
+                    << "Pos increment mismatch at index " << i;
+            ++i;
+        }
+        EXPECT_EQ(i, expected.size()) << "Number of tokens mismatch";
+    }
+};
+
+TEST_F(BasicTokenizerFactoryTest, BasicTokenizationL1) {
+    // Test L1 mode: English + numbers + Chinese tokenization
+    assert_tokenizer_output("Hello world!", {{"Hello", 1}, {"world", 1}}, BasicTokenizerMode::L1);
+}
+
+TEST_F(BasicTokenizerFactoryTest, ChineseTokenizationL1) {
+    // Test L1 mode with Chinese characters
+    assert_tokenizer_output("你好世界", {{"你", 1}, {"好", 1}, {"世", 1}, {"界", 1}},
+                            BasicTokenizerMode::L1);
+}
+
+TEST_F(BasicTokenizerFactoryTest, MixedLanguageL1) {
+    // Test L1 mode with mixed English and Chinese
+    assert_tokenizer_output(
+            "Hello你好World世界",
+            {{"Hello", 1}, {"你", 1}, {"好", 1}, {"World", 1}, {"世", 1}, {"界", 1}},
+            BasicTokenizerMode::L1);
+}
+
+TEST_F(BasicTokenizerFactoryTest, NumbersAndPunctuationL1) {
+    // Test L1 mode with numbers and punctuation
+    assert_tokenizer_output("Version 2.0 版本",
+                            {{"Version", 1}, {"2", 1}, {"0", 1}, {"版", 1}, {"本", 1}},
+                            BasicTokenizerMode::L1);
+}
+
+TEST_F(BasicTokenizerFactoryTest, BasicTokenizationL2) {
+    // Test L2 mode: L1 + all Unicode characters tokenized
+    assert_tokenizer_output("Hello world!", {{"Hello", 1}, {"world", 1}, {"!", 1}},
+                            BasicTokenizerMode::L2);
+}
+
+TEST_F(BasicTokenizerFactoryTest, UnicodeTokenizationL2) {
+    // Test L2 mode with various Unicode characters
+    assert_tokenizer_output("Hello��世界", {{"Hello", 1}, {"�", 1}, {"�", 1}, {"世", 1}, {"界", 1}},
+                            BasicTokenizerMode::L2);
+}
+
+TEST_F(BasicTokenizerFactoryTest, WhitespaceHandlingL2) {
+    // Test L2 mode skips whitespace
+    assert_tokenizer_output("Hello   world", {{"Hello", 1}, {"world", 1}}, BasicTokenizerMode::L2);
+}
+
+TEST_F(BasicTokenizerFactoryTest, FactoryInitialization) {
+    Settings settings;
+    settings.set("mode", "1");
+
+    BasicTokenizerFactory factory;
+    factory.initialize(settings);
+
+    auto tokenizer = factory.create();
+    auto basic_tokenizer = std::dynamic_pointer_cast<BasicTokenizer>(tokenizer);
+    ASSERT_NE(basic_tokenizer, nullptr);
+}
+
+TEST_F(BasicTokenizerFactoryTest, FactoryInitializationL2) {
+    Settings settings;
+    settings.set("mode", "2");
+
+    BasicTokenizerFactory factory;
+    factory.initialize(settings);
+
+    auto tokenizer = factory.create();
+    auto basic_tokenizer = std::dynamic_pointer_cast<BasicTokenizer>(tokenizer);
+    ASSERT_NE(basic_tokenizer, nullptr);
+}
+
+TEST_F(BasicTokenizerFactoryTest, DefaultMode) {
+    // Test default mode (L1) when no mode is specified
+    Settings settings;
+    BasicTokenizerFactory factory;
+    factory.initialize(settings);
+
+    auto tokenizer = factory.create();
+    auto basic_tokenizer = std::dynamic_pointer_cast<BasicTokenizer>(tokenizer);
+    ASSERT_NE(basic_tokenizer, nullptr);
+}
+
+TEST_F(BasicTokenizerFactoryTest, InvalidMode) {
+    Settings settings;
+    settings.set("mode", "3"); // Invalid mode
+
+    BasicTokenizerFactory factory;
+    EXPECT_THROW(factory.initialize(settings), Exception);
+}
+
+TEST_F(BasicTokenizerFactoryTest, InvalidModeZero) {
+    Settings settings;
+    settings.set("mode", "0"); // Invalid mode
+
+    BasicTokenizerFactory factory;
+    EXPECT_THROW(factory.initialize(settings), Exception);
+}
+
+TEST_F(BasicTokenizerFactoryTest, InvalidModeNegative) {
+    Settings settings;
+    settings.set("mode", "-1"); // Invalid mode
+
+    BasicTokenizerFactory factory;
+    EXPECT_THROW(factory.initialize(settings), Exception);
+}
+
+TEST_F(BasicTokenizerFactoryTest, EdgeCases) {
+    // Test empty string
+    assert_tokenizer_output("", {}, BasicTokenizerMode::L1);
+
+    // Test whitespace only
+    assert_tokenizer_output("   ", {}, BasicTokenizerMode::L1);
+
+    // Test punctuation only (L1 mode should skip non-Chinese punctuation)
+    assert_tokenizer_output("...", {}, BasicTokenizerMode::L1);
+
+    // Test punctuation only (L2 mode should tokenize punctuation)
+    assert_tokenizer_output("...", {{".", 1}, {".", 1}, {".", 1}}, BasicTokenizerMode::L2);
+}
+
+TEST_F(BasicTokenizerFactoryTest, LongText) {
+    // Test with longer text
+    std::string long_text = "This is a long text with multiple words and 中文 characters";
+    std::vector<ExpectedToken> expected = {
+            {"This", 1},     {"is", 1},    {"a", 1},   {"long", 1}, {"text", 1}, {"with", 1},
+            {"multiple", 1}, {"words", 1}, {"and", 1}, {"中", 1},   {"文", 1},   {"characters", 1}};
+    assert_tokenizer_output(long_text, expected, BasicTokenizerMode::L1);
+}
+
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/char_group_tokenizer_factory_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/char_group_tokenizer_factory_test.cpp
index 7d2782098a1a5e..91d978f83fa330 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/char_group_tokenizer_factory_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/char_group_tokenizer_factory_test.cpp
@@ -29,9 +29,9 @@ class CharGroupTokenizerTest : public ::testing::Test {
         std::vector<std::string> tokens;
         auto tokenizer = factory.create();
         {
-            lucene::util::SStringReader<char> reader;
-            reader.init(text.data(), text.size(), false);
-            tokenizer->set_reader(&reader);
+            ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+            reader->init(text.data(), text.size(), false);
+            tokenizer->set_reader(reader);
             tokenizer->reset();
 
             Token t;
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/edge_ngram_tokenizer_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/edge_ngram_tokenizer_test.cpp
index 0d9f1802952e6c..00b22b484493f2 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/edge_ngram_tokenizer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/edge_ngram_tokenizer_test.cpp
@@ -33,9 +33,9 @@ std::vector<std::string> tokenize(EdgeNGramTokenizerFactory& factory, const std:
     std::vector<std::string> tokens;
     auto tokenizer = factory.create();
     {
-        lucene::util::SStringReader<char> reader;
-        reader.init(text.data(), text.size(), false);
-        tokenizer->set_reader(&reader);
+        ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+        reader->init(text.data(), text.size(), false);
+        tokenizer->set_reader(reader);
         tokenizer->reset();
 
         Token t;
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/icu_tokenizer_factory_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/icu_tokenizer_factory_test.cpp
new file mode 100644
index 00000000000000..fb736ecaa76628
--- /dev/null
+++ b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/icu_tokenizer_factory_test.cpp
@@ -0,0 +1,212 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/inverted_index/tokenizer/icu/icu_tokenizer_factory.h"
+
+#include <gtest/gtest.h>
+
+namespace doris::segment_v2::inverted_index {
+
+TokenStreamPtr create_icu_tokenizer(const std::string& text, Settings settings = Settings()) {
+    ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(text.data(), text.size(), false);
+
+    ICUTokenizerFactory factory;
+    factory.initialize(settings);
+    auto tokenizer = factory.create();
+    tokenizer->set_reader(reader);
+    tokenizer->reset();
+    return tokenizer;
+}
+
+struct ExpectedToken {
+    std::string term;
+    int pos_inc;
+};
+
+class ICUTokenizerFactoryTest : public ::testing::Test {
+protected:
+    void SetUp() override {
+        original_dict_path_ = config::inverted_index_dict_path;
+
+        constexpr static uint32_t MAX_PATH_LEN = 1024;
+        char buffer[MAX_PATH_LEN];
+        EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
+        std::string current_dir = std::string(buffer);
+
+        config::inverted_index_dict_path = current_dir + "/be/dict";
+    }
+
+    void TearDown() override { config::inverted_index_dict_path = original_dict_path_; }
+
+    void assert_tokenizer_output(const std::string& text,
+                                 const std::vector<ExpectedToken>& expected) {
+        auto tokenizer = create_icu_tokenizer(text);
+
+        Token t;
+        size_t i = 0;
+        while (tokenizer->next(&t)) {
+            ASSERT_LT(i, expected.size()) << "More tokens produced than expected";
+            std::string term(t.termBuffer<char>(), t.termLength<char>());
+            EXPECT_EQ(term, expected[i].term) << "Term mismatch at index " << i;
+            EXPECT_EQ(t.getPositionIncrement(), expected[i].pos_inc)
+                    << "Pos increment mismatch at index " << i;
+            ++i;
+        }
+        EXPECT_EQ(i, expected.size()) << "Number of tokens mismatch";
+    }
+
+private:
+    std::string original_dict_path_;
+};
+
+TEST_F(ICUTokenizerFactoryTest, BasicEnglishTokenization) {
+    // Test basic English tokenization
+    assert_tokenizer_output("Hello world!", {{"Hello", 1}, {"world", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, EnglishWithPunctuation) {
+    // Test English with punctuation
+    assert_tokenizer_output(
+            "This is a test, with punctuation!",
+            {{"This", 1}, {"is", 1}, {"a", 1}, {"test", 1}, {"with", 1}, {"punctuation", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, EnglishWithNumbers) {
+    // Test English with numbers
+    assert_tokenizer_output(
+            "Version 2.0 was released in 2023",
+            {{"Version", 1}, {"2.0", 1}, {"was", 1}, {"released", 1}, {"in", 1}, {"2023", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, ChineseTokenization) {
+    // Test Chinese character tokenization
+    assert_tokenizer_output("你好世界", {{"你好", 1}, {"世界", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, MixedLanguage) {
+    // Test mixed English and Chinese
+    assert_tokenizer_output("Hello你好World世界",
+                            {{"Hello", 1}, {"你好", 1}, {"World", 1}, {"世界", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, JapaneseTokenization) {
+    // Test Japanese tokenization (Hiragana, Katakana, Kanji)
+    assert_tokenizer_output("こんにちは世界", {{"こんにちは", 1}, {"世界", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, KoreanTokenization) {
+    // Test Korean tokenization
+    assert_tokenizer_output("안녕하세요", {{"안녕하세요", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, ArabicTokenization) {
+    // Test Arabic tokenization
+    assert_tokenizer_output("مرحبا بالعالم", {{"مرحبا", 1}, {"بالعالم", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, CyrillicTokenization) {
+    // Test Cyrillic (Russian) tokenization
+    assert_tokenizer_output("Привет мир", {{"Привет", 1}, {"мир", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, EmojiAndSymbols) {
+    // Test emoji and special symbols
+    assert_tokenizer_output("Hello 😀 world 🌍",
+                            {{"Hello", 1}, {"😀", 1}, {"world", 1}, {"🌍", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, WhitespaceHandling) {
+    // Test whitespace handling
+    assert_tokenizer_output("Hello   world\t\n", {{"Hello", 1}, {"world", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, FactoryInitialization) {
+    Settings settings;
+    ICUTokenizerFactory factory;
+    factory.initialize(settings);
+
+    auto tokenizer = factory.create();
+    auto icu_tokenizer = std::dynamic_pointer_cast<ICUTokenizer>(tokenizer);
+    ASSERT_NE(icu_tokenizer, nullptr);
+}
+
+TEST_F(ICUTokenizerFactoryTest, FactoryCreateMultipleInstances) {
+    ICUTokenizerFactory factory;
+    factory.initialize(Settings {});
+
+    auto tokenizer1 = factory.create();
+    auto tokenizer2 = factory.create();
+
+    ASSERT_NE(tokenizer1, tokenizer2);
+    ASSERT_NE(tokenizer1, nullptr);
+    ASSERT_NE(tokenizer2, nullptr);
+}
+
+TEST_F(ICUTokenizerFactoryTest, EdgeCases) {
+    // Test empty string
+    assert_tokenizer_output("", {});
+
+    // Test whitespace only
+    assert_tokenizer_output("   \t\n", {});
+
+    // Test single character
+    assert_tokenizer_output("a", {{"a", 1}});
+
+    // Test single Chinese character
+    assert_tokenizer_output("中", {{"中", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, LongText) {
+    // Test with longer text
+    std::string long_text =
+            "This is a long text with multiple words and 中文 characters and 日本語 text";
+    std::vector<ExpectedToken> expected = {
+            {"This", 1},       {"is", 1},       {"a", 1},      {"long", 1}, {"text", 1},
+            {"with", 1},       {"multiple", 1}, {"words", 1},  {"and", 1},  {"中文", 1},
+            {"characters", 1}, {"and", 1},      {"日本語", 1}, {"text", 1}};
+    assert_tokenizer_output(long_text, expected);
+}
+
+TEST_F(ICUTokenizerFactoryTest, SpecialCharacters) {
+    // Test special characters and symbols
+    assert_tokenizer_output("Price: $100.50 (USD)", {{"Price", 1}, {"100.50", 1}, {"USD", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, URLAndEmail) {
+    // Test URL and email handling
+    assert_tokenizer_output("Visit https://example.com or email test@example.com",
+                            {{"Visit", 1},
+                             {"https", 1},
+                             {"example.com", 1},
+                             {"or", 1},
+                             {"email", 1},
+                             {"test", 1},
+                             {"example.com", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, CaseSensitivity) {
+    // Test case sensitivity (ICU tokenizer should preserve case by default)
+    assert_tokenizer_output("Hello WORLD Test", {{"Hello", 1}, {"WORLD", 1}, {"Test", 1}});
+}
+
+TEST_F(ICUTokenizerFactoryTest, UnicodeNormalization) {
+    // Test Unicode normalization
+    assert_tokenizer_output("café naïve résumé", {{"café", 1}, {"naïve", 1}, {"résumé", 1}});
+}
+
+} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/keyword_analyzer_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/keyword_analyzer_test.cpp
index 1a8283fd1ac49b..0b334b1492d51f 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/keyword_analyzer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/keyword_analyzer_test.cpp
@@ -32,9 +32,9 @@ std::vector<std::string> tokenize(KeywordTokenizerFactory& factory, const std::s
     std::vector<std::string> tokens;
     auto tokenizer = factory.create();
     {
-        lucene::util::SStringReader<char> reader;
-        reader.init(text.data(), text.size(), false);
-        tokenizer->set_reader(&reader);
+        ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+        reader->init(text.data(), text.size(), false);
+        tokenizer->set_reader(reader);
         tokenizer->reset();
 
         Token t;
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/ngram_tokenizer_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/ngram_tokenizer_test.cpp
index 9b55aae48aa7c0..2249a45d4d37fd 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/ngram_tokenizer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/ngram_tokenizer_test.cpp
@@ -35,9 +35,9 @@ std::vector<std::string> tokenize(NGramTokenizerFactory& factory, const std::str
     std::vector<std::string> tokens;
     auto tokenizer = factory.create();
     {
-        lucene::util::SStringReader<char> reader;
-        reader.init(text.data(), text.size(), false);
-        tokenizer->set_reader(&reader);
+        ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+        reader->init(text.data(), text.size(), false);
+        tokenizer->set_reader(reader);
         tokenizer->reset();
 
         Token t;
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/standard_tokenizer_factory_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/standard_tokenizer_factory_test.cpp
index 5cbc56890961a3..631e95745bbab2 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/standard_tokenizer_factory_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/tokenizer/standard_tokenizer_factory_test.cpp
@@ -22,13 +22,13 @@
 namespace doris::segment_v2::inverted_index {
 
 TokenStreamPtr create_standard_tokenizer(const std::string& text, Settings settings = Settings()) {
-    static lucene::util::SStringReader<char> reader;
-    reader.init(text.data(), text.size(), false);
+    ReaderPtr reader = std::make_shared<lucene::util::SStringReader<char>>();
+    reader->init(text.data(), text.size(), false);
 
     StandardTokenizerFactory factory;
     factory.initialize(settings);
     auto tokenizer = factory.create();
-    tokenizer->set_reader(&reader);
+    tokenizer->set_reader(reader);
     tokenizer->reset();
     return tokenizer;
 }
diff --git a/be/test/olap/rowset/segment_v2/inverted_index/util/reader_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/util/reader_test.cpp
new file mode 100644
index 00000000000000..38b17251b8729f
--- /dev/null
+++ b/be/test/olap/rowset/segment_v2/inverted_index/util/reader_test.cpp
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/inverted_index/util/reader.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+
+#include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h"
+#include "olap/rowset/segment_v2/inverted_index_common.h"
+#include "util/slice.h"
+
+using namespace lucene::analysis;
+using namespace doris::segment_v2::inverted_index;
+
+namespace doris::segment_v2 {
+
+TEST(ReaderTest, ArrayFieldTokenStreamWorkflow) {
+    CharFilterMap char_filter_map;
+    char_filter_map["char_filter_type"] = "char_replace";
+    char_filter_map["char_filter_pattern"] = ",";
+    char_filter_map["char_filter_replacement"] = " ";
+
+    // 正确创建 InvertedIndexCtx
+    auto inverted_index_ctx = std::make_shared<InvertedIndexCtx>();
+    inverted_index_ctx->custom_analyzer = "";
+    inverted_index_ctx->parser_type = InvertedIndexParserType::PARSER_STANDARD;
+    inverted_index_ctx->parser_mode = "standard";
+    inverted_index_ctx->support_phrase = "yes";
+    inverted_index_ctx->char_filter_map = char_filter_map;
+    inverted_index_ctx->lower_case = "true";
+    inverted_index_ctx->stop_words = "";
+
+    auto analyzer = InvertedIndexAnalyzer::create_analyzer(inverted_index_ctx.get());
+    ASSERT_NE(analyzer, nullptr);
+
+    std::string test_data = "hello,world,test";
+    Slice slice(test_data);
+
+    std::vector<ReaderPtr> keep_readers;
+    auto dir = std::make_shared<lucene::store::RAMDirectory>();
+    {
+        lucene::index::IndexWriter indexwriter(dir.get(), analyzer.get(), true);
+        indexwriter.setRAMBufferSizeMB(512);
+        indexwriter.setMaxFieldLength(0x7FFFFFFFL);
+        indexwriter.setMergeFactor(1000000000);
+        indexwriter.setUseCompoundFile(false);
+        lucene::document::Document doc;
+        std::unique_ptr<lucene::document::Field> new_field;
+        for (int i = 0; i < 2; i++) {
+            int32_t field_config = lucene::document::Field::STORE_NO;
+            field_config |= lucene::document::Field::INDEX_NONORMS;
+            field_config |= lucene::document::Field::INDEX_TOKENIZED;
+            auto* field = _CLNEW lucene::document::Field(L"name", field_config);
+            new_field.reset(field);
+            {
+                ReaderPtr char_string_reader =
+                        InvertedIndexAnalyzer::create_reader(inverted_index_ctx->char_filter_map);
+                char_string_reader->init(slice.get_data(), cast_set<int32_t>(slice.get_size()),
+                                         false);
+
+                auto* ts = analyzer->tokenStream(new_field->name(), char_string_reader);
+                ASSERT_NE(ts, nullptr);
+
+                new_field->setValue(ts, true);
+                keep_readers.emplace_back(std::move(char_string_reader));
+            }
+            doc.add(*new_field.release());
+        }
+        indexwriter.addDocument(&doc);
+        indexwriter.close();
+    }
+    dir->close();
+}
+
+} // namespace doris::segment_v2
\ No newline at end of file
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index 1ae0cc0e075ced..fa9f119258c2f3 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -126,6 +126,7 @@ CATALOGS: 'CATALOGS';
 CHAIN: 'CHAIN';
 CHAR: 'CHAR' | 'CHARACTER';
 CHARSET: 'CHARSET';
+CHAR_FILTER: 'CHAR_FILTER';
 CHECK: 'CHECK';
 CLEAN: 'CLEAN';
 CLUSTER: 'CLUSTER';
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index 25c0d9a00abcf9..8a485a9c053e7b 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -242,6 +242,8 @@ supportedCreateStatement
         name=identifier properties=propertyClause?                                  #createIndexTokenizer
     | CREATE INVERTED INDEX TOKEN_FILTER (IF NOT EXISTS)?
         name=identifier properties=propertyClause?                                  #createIndexTokenFilter
+    | CREATE INVERTED INDEX CHAR_FILTER (IF NOT EXISTS)?
+        name=identifier properties=propertyClause?                                  #createIndexCharFilter
     ;
 
 dictionaryColumnDefs:
@@ -325,6 +327,7 @@ supportedDropStatement
     | DROP INVERTED INDEX ANALYZER (IF EXISTS)? name=identifier                 #dropIndexAnalyzer
     | DROP INVERTED INDEX TOKENIZER (IF EXISTS)? name=identifier                #dropIndexTokenizer
     | DROP INVERTED INDEX TOKEN_FILTER (IF EXISTS)? name=identifier             #dropIndexTokenFilter
+    | DROP INVERTED INDEX CHAR_FILTER (IF EXISTS)? name=identifier              #dropIndexCharFilter
     ;
 
 supportedShowStatement
@@ -463,6 +466,7 @@ supportedLoadStatement
     | SHOW INVERTED INDEX ANALYZER                                                  #showIndexAnalyzer
     | SHOW INVERTED INDEX TOKENIZER                                                 #showIndexTokenizer
     | SHOW INVERTED INDEX TOKEN_FILTER                                              #showIndexTokenFilter
+    | SHOW INVERTED INDEX CHAR_FILTER                                               #showIndexCharFilter
     ;
 
 supportedKillStatement
diff --git a/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/BasicTokenizerValidator.java b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/BasicTokenizerValidator.java
new file mode 100644
index 00000000000000..ee33cc34ea42fd
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/BasicTokenizerValidator.java
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.indexpolicy;
+
+import org.apache.doris.common.DdlException;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.Map;
+import java.util.Set;
+
+public class BasicTokenizerValidator extends BasePolicyValidator {
+    private static final Set<String> ALLOWED_PROPS = ImmutableSet.of("type", "mode");
+
+    public BasicTokenizerValidator() {
+        super(ALLOWED_PROPS);
+    }
+
+    @Override
+    protected String getTypeName() {
+        return "basic tokenizer";
+    }
+
+    @Override
+    protected void validateSpecific(Map<String, String> props) throws DdlException {
+        if (props.containsKey("mode")) {
+            try {
+                int mode = Integer.parseInt(props.get("mode"));
+                if (mode < 1 || mode > 2) {
+                    throw new DdlException("Invalid mode for basic tokenizer: " + mode
+                            + ". Mode must be 1 (L1: English + numbers + Chinese) "
+                            + "or 2 (L2: L1 + all Unicode characters)");
+                }
+            } catch (NumberFormatException e) {
+                throw new DdlException("mode must be a positive integer (1 or 2)");
+            }
+        }
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/CharReplaceCharFilterValidator.java b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/CharReplaceCharFilterValidator.java
new file mode 100644
index 00000000000000..2e7fe15b2a2bea
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/CharReplaceCharFilterValidator.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.indexpolicy;
+
+import org.apache.doris.common.DdlException;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.Map;
+import java.util.Set;
+
+public class CharReplaceCharFilterValidator extends BasePolicyValidator {
+    private static final Set<String> ALLOWED_PROPS = ImmutableSet.of(
+            "type", "pattern", "replacement");
+
+    public CharReplaceCharFilterValidator() {
+        super(ALLOWED_PROPS);
+    }
+
+    @Override
+    protected String getTypeName() {
+        return "char_replace filter";
+    }
+
+    @Override
+    protected void validateSpecific(Map<String, String> props) throws DdlException {
+        if (props.containsKey("pattern")) {
+            String pattern = props.get("pattern");
+            if (pattern != null && !pattern.isEmpty()) {
+                for (int i = 0; i < pattern.length(); i++) {
+                    if (pattern.charAt(i) > 255) {
+                        throw new DdlException(
+                                "pattern must contain only single-byte characters in [0,255]");
+                    }
+                }
+            }
+        }
+        if (props.containsKey("replacement")) {
+            String replacement = props.get("replacement");
+            if (replacement == null || replacement.length() != 1) {
+                throw new DdlException("replacement must be exactly one byte");
+            }
+            if (replacement.charAt(0) > 255) {
+                throw new DdlException("replacement must be in [0,255]");
+            }
+        }
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/ICUTokenizerValidator.java b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/ICUTokenizerValidator.java
new file mode 100644
index 00000000000000..cb7254863572b6
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/ICUTokenizerValidator.java
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.indexpolicy;
+
+import org.apache.doris.common.DdlException;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.Map;
+import java.util.Set;
+
+public class ICUTokenizerValidator extends BasePolicyValidator {
+    private static final Set<String> ALLOWED_PROPS = ImmutableSet.of("type");
+
+    public ICUTokenizerValidator() {
+        super(ALLOWED_PROPS);
+    }
+
+    @Override
+    protected String getTypeName() {
+        return "icu tokenizer";
+    }
+
+    @Override
+    protected void validateSpecific(Map<String, String> props) throws DdlException {
+        // ICU tokenizer doesn't have additional parameters to validate
+        // It uses default ICU configuration
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicy.java b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicy.java
index 74fa1c7f8a8d01..8fd02a59292165 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicy.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicy.java
@@ -56,13 +56,17 @@ public class IndexPolicy implements Writable, GsonPostProcessable {
     public static final String PROP_ANALYZER = "analyzer";
     public static final String PROP_TOKENIZER = "tokenizer";
     public static final String PROP_TOKEN_FILTER = "token_filter";
+    public static final String PROP_CHAR_FILTER = "char_filter";
 
     public static final Set<String> BUILTIN_TOKENIZERS = ImmutableSet.of(
-            "ngram", "edge_ngram", "keyword", "standard", "char_group");
+            "ngram", "edge_ngram", "keyword", "standard", "char_group", "basic", "icu");
 
     public static final Set<String> BUILTIN_TOKEN_FILTERS = ImmutableSet.of(
             "asciifolding", "word_delimiter", "lowercase");
 
+    public static final Set<String> BUILTIN_CHAR_FILTERS = ImmutableSet.of(
+            "char_replace");
+
     private static final Logger LOG = LogManager.getLogger(IndexPolicy.class);
 
     @SerializedName(value = "id")
diff --git a/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicyMgr.java b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicyMgr.java
index b92037e953e1aa..d52351f2488aff 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicyMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicyMgr.java
@@ -109,6 +109,9 @@ public void createIndexPolicy(boolean ifNotExists, String policyName,
         if (IndexPolicy.BUILTIN_TOKEN_FILTERS.contains(policyName)) {
             throw new DdlException("Policy name '" + policyName + "' conflicts with built-in token filter name");
         }
+        if (IndexPolicy.BUILTIN_CHAR_FILTERS.contains(policyName)) {
+            throw new DdlException("Policy name '" + policyName + "' conflicts with built-in char filter name");
+        }
 
         IndexPolicy indexPolicy = IndexPolicy.create(policyName, type, properties);
 
@@ -161,6 +164,9 @@ private void validatePolicyProperties(IndexPolicyTypeEnum type, Map<String, Stri
             case TOKEN_FILTER:
                 validateTokenFilterProperties(properties);
                 break;
+            case CHAR_FILTER:
+                validateCharFilterProperties(properties);
+                break;
             default:
                 throw new DdlException("Unknown index policy type: " + type);
         }
@@ -169,10 +175,11 @@ private void validatePolicyProperties(IndexPolicyTypeEnum type, Map<String, Stri
     private void validateAnalyzerProperties(Map<String, String> properties) throws DdlException {
         for (String key : properties.keySet()) {
             if (!key.equals(IndexPolicy.PROP_TOKENIZER)
-                    && !key.equals(IndexPolicy.PROP_TOKEN_FILTER)) {
+                    && !key.equals(IndexPolicy.PROP_TOKEN_FILTER)
+                    && !key.equals(IndexPolicy.PROP_CHAR_FILTER)) {
                 throw new DdlException("Invalid analyzer property: '" + key + "'. Only '"
                     + IndexPolicy.PROP_TOKENIZER + "' and '" + IndexPolicy.PROP_TOKEN_FILTER
-                        + "' are allowed.");
+                        + "' and '" + IndexPolicy.PROP_CHAR_FILTER + "' are allowed.");
             }
         }
 
@@ -188,6 +195,13 @@ private void validateAnalyzerProperties(Map<String, String> properties) throws D
                 validatePolicyReference(filter, IndexPolicyTypeEnum.TOKEN_FILTER);
             }
         }
+
+        String charFilters = properties.get(IndexPolicy.PROP_CHAR_FILTER);
+        if (charFilters != null && !charFilters.isEmpty()) {
+            for (String filter : charFilters.split(",\\s*")) {
+                validatePolicyReference(filter, IndexPolicyTypeEnum.CHAR_FILTER);
+            }
+        }
     }
 
     private void validatePolicyReference(String name, IndexPolicyTypeEnum expectedType)
@@ -200,6 +214,10 @@ private void validatePolicyReference(String name, IndexPolicyTypeEnum expectedTy
                 && IndexPolicy.BUILTIN_TOKEN_FILTERS.contains(name)) {
             return;
         }
+        if (expectedType == IndexPolicyTypeEnum.CHAR_FILTER
+                && IndexPolicy.BUILTIN_CHAR_FILTERS.contains(name)) {
+            return;
+        }
 
         IndexPolicy policy = getPolicyByName(name);
         if (policy == null) {
@@ -263,6 +281,23 @@ private void validateTokenFilterProperties(Map<String, String> properties) throw
         validator.validate(properties);
     }
 
+    private void validateCharFilterProperties(Map<String, String> properties) throws DdlException {
+        String type = properties.get(IndexPolicy.PROP_TYPE);
+        if (type == null || type.isEmpty()) {
+            throw new DdlException("CHAR_FILTER must specify a 'type' property");
+        }
+        PolicyPropertyValidator validator;
+        switch (type) {
+            case "char_replace":
+                validator = new CharReplaceCharFilterValidator();
+                break;
+            default:
+                throw new DdlException("Unsupported char filter type: " + type
+                        + ". Supported types: " + IndexPolicy.BUILTIN_CHAR_FILTERS);
+        }
+        validator.validate(properties);
+    }
+
     public void dropIndexPolicy(boolean isIfExists, String indexPolicyName,
             IndexPolicyTypeEnum type) throws DdlException, AnalysisException {
         writeLock();
@@ -278,7 +313,8 @@ public void dropIndexPolicy(boolean isIfExists, String indexPolicyName,
                 checkAnalyzerNotUsedByIndex(policyToDrop.getName());
             }
             if (policyToDrop.getType() == IndexPolicyTypeEnum.TOKENIZER
-                    || policyToDrop.getType() == IndexPolicyTypeEnum.TOKEN_FILTER) {
+                    || policyToDrop.getType() == IndexPolicyTypeEnum.TOKEN_FILTER
+                    || policyToDrop.getType() == IndexPolicyTypeEnum.CHAR_FILTER) {
                 checkPolicyNotReferenced(policyToDrop);
             }
             long id = policyToDrop.getId();
@@ -303,7 +339,8 @@ private void checkAnalyzerNotUsedByIndex(String analyzerName) throws DdlExceptio
                         if (properties != null
                                 && analyzerName.equals(properties.get(IndexPolicy.PROP_ANALYZER))) {
                             throw new DdlException("the analyzer " + analyzerName + " is used by index: "
-                                    + index.getIndexName() + " in table: " + table.getName());
+                                    + index.getIndexName() + " in table: "
+                                    + db.getFullName() + "." + table.getName());
                         }
                     }
                 }
@@ -335,6 +372,17 @@ private void checkPolicyNotReferenced(IndexPolicy policy) throws DdlException {
                             }
                         }
                     }
+                } else if (policyType == IndexPolicyTypeEnum.CHAR_FILTER) {
+                    String charFilters = properties.get(IndexPolicy.PROP_CHAR_FILTER);
+                    if (charFilters != null && !charFilters.isEmpty()) {
+                        for (String filter : charFilters.split(",\\s*")) {
+                            if (policyName.equals(filter)) {
+                                throw new DdlException("Cannot drop " + policyType + " policy '"
+                                        + policyName + "' as it is referenced by ANALYZER policy '"
+                                                + analyzerPolicy.getName() + "'");
+                            }
+                        }
+                    }
                 }
             }
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicyTypeEnum.java b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicyTypeEnum.java
index 2f146e16884ff3..acda67c9b8c0a9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicyTypeEnum.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicyTypeEnum.java
@@ -23,13 +23,14 @@
  * Index policy type enum.
  **/
 public enum IndexPolicyTypeEnum {
-    ANALYZER, TOKENIZER, TOKEN_FILTER;
+    ANALYZER, TOKENIZER, TOKEN_FILTER, CHAR_FILTER;
 
     public TIndexPolicyType toThrift() {
         switch (this) {
             case ANALYZER: return TIndexPolicyType.ANALYZER;
             case TOKENIZER: return TIndexPolicyType.TOKENIZER;
             case TOKEN_FILTER: return TIndexPolicyType.TOKEN_FILTER;
+            case CHAR_FILTER: return TIndexPolicyType.CHAR_FILTER;
             default: throw new IllegalStateException("Unknown type: " + this);
         }
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 925d1e57c9b869..13e777867150ed 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -151,6 +151,7 @@
 import org.apache.doris.nereids.DorisParser.CreateEncryptkeyContext;
 import org.apache.doris.nereids.DorisParser.CreateFileContext;
 import org.apache.doris.nereids.DorisParser.CreateIndexAnalyzerContext;
+import org.apache.doris.nereids.DorisParser.CreateIndexCharFilterContext;
 import org.apache.doris.nereids.DorisParser.CreateIndexContext;
 import org.apache.doris.nereids.DorisParser.CreateIndexTokenFilterContext;
 import org.apache.doris.nereids.DorisParser.CreateIndexTokenizerContext;
@@ -189,6 +190,7 @@
 import org.apache.doris.nereids.DorisParser.DropFileContext;
 import org.apache.doris.nereids.DorisParser.DropFunctionContext;
 import org.apache.doris.nereids.DorisParser.DropIndexAnalyzerContext;
+import org.apache.doris.nereids.DorisParser.DropIndexCharFilterContext;
 import org.apache.doris.nereids.DorisParser.DropIndexClauseContext;
 import org.apache.doris.nereids.DorisParser.DropIndexContext;
 import org.apache.doris.nereids.DorisParser.DropIndexTokenFilterContext;
@@ -383,6 +385,7 @@
 import org.apache.doris.nereids.DorisParser.ShowGrantsContext;
 import org.apache.doris.nereids.DorisParser.ShowGrantsForUserContext;
 import org.apache.doris.nereids.DorisParser.ShowIndexAnalyzerContext;
+import org.apache.doris.nereids.DorisParser.ShowIndexCharFilterContext;
 import org.apache.doris.nereids.DorisParser.ShowIndexTokenFilterContext;
 import org.apache.doris.nereids.DorisParser.ShowIndexTokenizerContext;
 import org.apache.doris.nereids.DorisParser.ShowLastInsertContext;
@@ -664,6 +667,7 @@
 import org.apache.doris.nereids.trees.plans.commands.CreateFileCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateFunctionCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateIndexAnalyzerCommand;
+import org.apache.doris.nereids.trees.plans.commands.CreateIndexCharFilterCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateIndexTokenFilterCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateIndexTokenizerCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateJobCommand;
@@ -699,6 +703,7 @@
 import org.apache.doris.nereids.trees.plans.commands.DropFileCommand;
 import org.apache.doris.nereids.trees.plans.commands.DropFunctionCommand;
 import org.apache.doris.nereids.trees.plans.commands.DropIndexAnalyzerCommand;
+import org.apache.doris.nereids.trees.plans.commands.DropIndexCharFilterCommand;
 import org.apache.doris.nereids.trees.plans.commands.DropIndexTokenFilterCommand;
 import org.apache.doris.nereids.trees.plans.commands.DropIndexTokenizerCommand;
 import org.apache.doris.nereids.trees.plans.commands.DropJobCommand;
@@ -798,6 +803,7 @@
 import org.apache.doris.nereids.trees.plans.commands.ShowFunctionsCommand;
 import org.apache.doris.nereids.trees.plans.commands.ShowGrantsCommand;
 import org.apache.doris.nereids.trees.plans.commands.ShowIndexAnalyzerCommand;
+import org.apache.doris.nereids.trees.plans.commands.ShowIndexCharFilterCommand;
 import org.apache.doris.nereids.trees.plans.commands.ShowIndexCommand;
 import org.apache.doris.nereids.trees.plans.commands.ShowIndexStatsCommand;
 import org.apache.doris.nereids.trees.plans.commands.ShowIndexTokenFilterCommand;
@@ -9106,6 +9112,15 @@ public LogicalPlan visitCreateIndexTokenFilter(CreateIndexTokenFilterContext ctx
         return new CreateIndexTokenFilterCommand(ifNotExists, policyName, properties);
     }
 
+    @Override
+    public LogicalPlan visitCreateIndexCharFilter(CreateIndexCharFilterContext ctx) {
+        boolean ifNotExists = ctx.IF() != null;
+        String policyName = ctx.name.getText();
+        Map<String, String> properties = visitPropertyClause(ctx.properties);
+
+        return new CreateIndexCharFilterCommand(ifNotExists, policyName, properties);
+    }
+
     @Override
     public LogicalPlan visitDropIndexAnalyzer(DropIndexAnalyzerContext ctx) {
         String policyName = ctx.name.getText();
@@ -9130,6 +9145,14 @@ public LogicalPlan visitDropIndexTokenFilter(DropIndexTokenFilterContext ctx) {
         return new DropIndexTokenFilterCommand(policyName, ifExists);
     }
 
+    @Override
+    public LogicalPlan visitDropIndexCharFilter(DropIndexCharFilterContext ctx) {
+        String policyName = ctx.name.getText();
+        boolean ifExists = ctx.IF() != null;
+
+        return new DropIndexCharFilterCommand(policyName, ifExists);
+    }
+
     @Override
     public LogicalPlan visitShowIndexAnalyzer(ShowIndexAnalyzerContext ctx) {
         return new ShowIndexAnalyzerCommand();
@@ -9145,6 +9168,11 @@ public LogicalPlan visitShowIndexTokenFilter(ShowIndexTokenFilterContext ctx) {
         return new ShowIndexTokenFilterCommand();
     }
 
+    @Override
+    public LogicalPlan visitShowIndexCharFilter(ShowIndexCharFilterContext ctx) {
+        return new ShowIndexCharFilterCommand();
+    }
+
     @Override
     public AlterTableOp visitCreateOrReplaceBranchClauses(DorisParser.CreateOrReplaceBranchClausesContext ctx) {
         return visitCreateOrReplaceBranchClause(ctx.createOrReplaceBranchClause());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java
index f5ace7cd416c21..4274e8ca5b940c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java
@@ -456,5 +456,8 @@ public enum PlanType {
     SHOW_INDEX_TOKENIZER_COMMAND,
     SHOW_INDEX_TOKEN_FILTER_COMMAND,
     DROP_MATERIALIZED_VIEW_COMMAND,
+    CREATE_INDEX_CHAR_FILTER_COMMAND,
+    DROP_INDEX_CHAR_FILTER_COMMAND,
+    SHOW_INDEX_CHAR_FILTER_COMMAND,
     EMPTY_COMMAND
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateIndexCharFilterCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateIndexCharFilterCommand.java
new file mode 100644
index 00000000000000..261046d9659ab2
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateIndexCharFilterCommand.java
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.plans.commands;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.ErrorCode;
+import org.apache.doris.common.ErrorReport;
+import org.apache.doris.common.FeNameFormat;
+import org.apache.doris.common.UserException;
+import org.apache.doris.indexpolicy.IndexPolicyTypeEnum;
+import org.apache.doris.mysql.privilege.PrivPredicate;
+import org.apache.doris.nereids.trees.plans.PlanType;
+import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
+import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.qe.StmtExecutor;
+
+import java.util.Map;
+
+/**
+ * CREATE INVERTED INDEX CHAR_FILTER [IF NOT EXISTS] policy_name PROPERTIES (key1 = value1, ...)
+ */
+public class CreateIndexCharFilterCommand extends Command implements ForwardWithSync {
+
+    private final boolean ifNotExists;
+    private final String policyName;
+    private final Map<String, String> properties;
+
+    /**
+     * Constructor
+     */
+    public CreateIndexCharFilterCommand(boolean ifNotExists, String policyName, Map<String, String> properties) {
+        super(PlanType.CREATE_INDEX_CHAR_FILTER_COMMAND);
+        this.ifNotExists = ifNotExists;
+        this.policyName = policyName;
+        this.properties = properties;
+    }
+
+    private void validate(ConnectContext ctx) throws UserException {
+        // check auth
+        if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.ADMIN)) {
+            ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "ADMIN");
+        }
+
+        // check name
+        FeNameFormat.checkIndexPolicyName(policyName);
+    }
+
+    @Override
+    public void run(ConnectContext ctx, StmtExecutor executor) throws UserException {
+        validate(ctx);
+
+        Env.getCurrentEnv().getIndexPolicyMgr().createIndexPolicy(ifNotExists, policyName,
+                IndexPolicyTypeEnum.CHAR_FILTER, properties);
+    }
+
+    @Override
+    public <R, C> R accept(PlanVisitor<R, C> visitor, C context) {
+        return visitor.visitCreateIndexCharFilterCommand(this, context);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropIndexCharFilterCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropIndexCharFilterCommand.java
new file mode 100644
index 00000000000000..62874f54b8c0c7
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropIndexCharFilterCommand.java
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.plans.commands;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.ErrorCode;
+import org.apache.doris.common.ErrorReport;
+import org.apache.doris.common.FeNameFormat;
+import org.apache.doris.indexpolicy.IndexPolicyTypeEnum;
+import org.apache.doris.mysql.privilege.PrivPredicate;
+import org.apache.doris.nereids.trees.plans.PlanType;
+import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
+import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.qe.StmtExecutor;
+
+/**
+ * DROP INVERTED INDEX CHAR_FILTER [IF EXISTS] policy_name
+ **/
+public class DropIndexCharFilterCommand extends DropCommand {
+    private final boolean ifExists;
+    private final String name;
+
+    public DropIndexCharFilterCommand(String name, boolean ifExists) {
+        super(PlanType.DROP_INDEX_CHAR_FILTER_COMMAND);
+        this.name = name;
+        this.ifExists = ifExists;
+    }
+
+    @Override
+    public void doRun(ConnectContext ctx, StmtExecutor executor) throws Exception {
+        // check auth
+        if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.ADMIN)) {
+            ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "ADMIN");
+        }
+
+        // check name
+        FeNameFormat.checkIndexPolicyName(name);
+
+        Env.getCurrentEnv().getIndexPolicyMgr().dropIndexPolicy(ifExists, name,
+                IndexPolicyTypeEnum.CHAR_FILTER);
+    }
+
+    @Override
+    public <R, C> R accept(PlanVisitor<R, C> visitor, C context) {
+        return visitor.visitDropIndexCharFilterCommand(this, context);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowIndexCharFilterCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowIndexCharFilterCommand.java
new file mode 100644
index 00000000000000..8ebb0adfa50b36
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowIndexCharFilterCommand.java
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.plans.commands;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.ErrorCode;
+import org.apache.doris.common.ErrorReport;
+import org.apache.doris.indexpolicy.IndexPolicy;
+import org.apache.doris.indexpolicy.IndexPolicyTypeEnum;
+import org.apache.doris.mysql.privilege.PrivPredicate;
+import org.apache.doris.nereids.trees.plans.PlanType;
+import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
+import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.qe.ShowResultSet;
+import org.apache.doris.qe.ShowResultSetMetaData;
+import org.apache.doris.qe.StmtExecutor;
+
+/**
+ * SHOW INVERTED INDEX CHAR_FILTER;
+ **/
+public class ShowIndexCharFilterCommand extends ShowCommand {
+    public ShowIndexCharFilterCommand() {
+        super(PlanType.SHOW_INDEX_CHAR_FILTER_COMMAND);
+    }
+
+    @Override
+    public ShowResultSet doRun(ConnectContext ctx, StmtExecutor executor) throws Exception {
+        // check auth
+        if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.ADMIN)) {
+            ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "ADMIN");
+        }
+
+        return Env.getCurrentEnv().getIndexPolicyMgr().showIndexPolicy(IndexPolicyTypeEnum.CHAR_FILTER);
+    }
+
+    @Override
+    public <R, C> R accept(PlanVisitor<R, C> visitor, C context) {
+        return visitor.visitShowIndexCharFilterCommand(this, context);
+    }
+
+    @Override
+    public ShowResultSetMetaData getMetaData() {
+        return IndexPolicy.INDEX_POLICY_META_DATA;
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java
index 5f679090ffe770..63072f34efeff5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java
@@ -78,6 +78,7 @@
 import org.apache.doris.nereids.trees.plans.commands.CreateFileCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateFunctionCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateIndexAnalyzerCommand;
+import org.apache.doris.nereids.trees.plans.commands.CreateIndexCharFilterCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateIndexTokenFilterCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateIndexTokenizerCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateJobCommand;
@@ -112,6 +113,7 @@
 import org.apache.doris.nereids.trees.plans.commands.DropFileCommand;
 import org.apache.doris.nereids.trees.plans.commands.DropFunctionCommand;
 import org.apache.doris.nereids.trees.plans.commands.DropIndexAnalyzerCommand;
+import org.apache.doris.nereids.trees.plans.commands.DropIndexCharFilterCommand;
 import org.apache.doris.nereids.trees.plans.commands.DropIndexTokenFilterCommand;
 import org.apache.doris.nereids.trees.plans.commands.DropIndexTokenizerCommand;
 import org.apache.doris.nereids.trees.plans.commands.DropJobCommand;
@@ -211,6 +213,7 @@
 import org.apache.doris.nereids.trees.plans.commands.ShowFunctionsCommand;
 import org.apache.doris.nereids.trees.plans.commands.ShowGrantsCommand;
 import org.apache.doris.nereids.trees.plans.commands.ShowIndexAnalyzerCommand;
+import org.apache.doris.nereids.trees.plans.commands.ShowIndexCharFilterCommand;
 import org.apache.doris.nereids.trees.plans.commands.ShowIndexCommand;
 import org.apache.doris.nereids.trees.plans.commands.ShowIndexStatsCommand;
 import org.apache.doris.nereids.trees.plans.commands.ShowIndexTokenFilterCommand;
@@ -1410,6 +1413,11 @@ default R visitCreateIndexAnalyzerCommand(
         return visitCommand(createIndexAnalyzerCommand, context);
     }
 
+    default R visitCreateIndexCharFilterCommand(
+            CreateIndexCharFilterCommand createIndexCharFilterCommand, C context) {
+        return visitCommand(createIndexCharFilterCommand, context);
+    }
+
     default R visitCreateIndexTokenizerCommand(
             CreateIndexTokenizerCommand createIndexTokenizerCommand, C context) {
         return visitCommand(createIndexTokenizerCommand, context);
@@ -1425,6 +1433,11 @@ default R visitDropIndexAnalyzerCommand(
         return visitCommand(dropIndexAnalyzerCommand, context);
     }
 
+    default R visitDropIndexCharFilterCommand(
+            DropIndexCharFilterCommand dropIndexCharFilterCommand, C context) {
+        return visitCommand(dropIndexCharFilterCommand, context);
+    }
+
     default R visitShowCreateStorageVaultCommand(ShowCreateStorageVaultCommand command, C context) {
         return visitCommand(command, context);
     }
@@ -1444,6 +1457,11 @@ default R visitShowIndexAnalyzerCommand(
         return visitCommand(showIndexAnalyzerCommand, context);
     }
 
+    default R visitShowIndexCharFilterCommand(
+            ShowIndexCharFilterCommand showIndexCharFilterCommand, C context) {
+        return visitCommand(showIndexCharFilterCommand, context);
+    }
+
     default R visitShowIndexTokenizerCommand(
             ShowIndexTokenizerCommand showIndexTokenizerCommand, C context) {
         return visitCommand(showIndexTokenizerCommand, context);
diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift
index a23bf8e81faa3c..bed44748a66312 100644
--- a/gensrc/thrift/AgentService.thrift
+++ b/gensrc/thrift/AgentService.thrift
@@ -138,7 +138,8 @@ struct TPushStoragePolicyReq {
 enum TIndexPolicyType {
     ANALYZER,
     TOKENIZER,
-    TOKEN_FILTER
+    TOKEN_FILTER,
+    CHAR_FILTER
 }
 
 struct TIndexPolicy {
diff --git a/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out b/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
index 4c22ab33fd210b..687807afbfdc8b 100644
--- a/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
+++ b/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
@@ -29,6 +29,12 @@
 -- !tokenize_sql --
 [{\n        "token": "1080º"\n    }, {\n        "token": "avalanche"\n    }]
 
+-- !tokenize_sql --
+[{\n        "token": "get"\n    }, {\n        "token": "images"\n    }, {\n        "token": "hm"\n    }, {\n        "token": "bg"\n    }, {\n        "token": "jpg"\n    }, {\n        "token": "http"\n    }, {\n        "token": "1"\n    }, {\n        "token": "0"\n    }]
+
+-- !tokenize_sql --
+[{\n        "token": "让"\n    }, {\n        "token": "我们"\n    }, {\n        "token": "说"\n    }, {\n        "token": "hello"\n    }, {\n        "token": "そして"\n    }, {\n        "token": "世界"\n    }, {\n        "token": "と"\n    }, {\n        "token": "つ"\n    }, {\n        "token": "な"\n    }, {\n        "token": "が"\n    }, {\n        "token": "ろう"\n    }]
+
 -- !sql --
 1	abcDEF
 
diff --git a/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer1.out b/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer1.out
index 124a6e37ed1b34..d976dd5b18644f 100644
--- a/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer1.out
+++ b/regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer1.out
@@ -6,3 +6,9 @@
 1	A two-hour programme which included many forms of [[jazz]] from classic to Latin as well as a mix of jazz from the younger players of the day.
 2	 with off-peak shows introducing more commercial breaks into their output, before the concept was dropped altogether in mid-2006.
 
+-- !sql --
+1	GET /images/hm_bg.jpg HTTP/1.0
+
+-- !sql --
+1	GET /images/hm_bg.jpg HTTP/1.0
+
diff --git a/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy b/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
index 9c2315b27eb326..14ee4c6819c143 100644
--- a/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
+++ b/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
@@ -63,8 +63,26 @@ suite("test_custom_analyzer", "p0") {
         CREATE INVERTED INDEX ANALYZER IF NOT EXISTS keyword_lowercase
         PROPERTIES
         (
-        "tokenizer" = "keyword",
-        "token_filter" = "asciifolding, lowercase"
+            "tokenizer" = "keyword",
+            "token_filter" = "asciifolding, lowercase"
+        );
+    """
+
+    sql """
+        CREATE INVERTED INDEX ANALYZER IF NOT EXISTS basic_analyzer
+        PROPERTIES
+        (
+            "tokenizer" = "basic",
+            "token_filter" = "lowercase"
+        );
+    """
+
+    sql """
+        CREATE INVERTED INDEX ANALYZER IF NOT EXISTS icu_analyzer
+        PROPERTIES
+        (
+            "tokenizer" = "icu",
+            "token_filter" = "lowercase"
         );
     """
 
@@ -80,6 +98,8 @@ suite("test_custom_analyzer", "p0") {
     qt_tokenize_sql """ select tokenize("β-carbon nitride", '"analyzer"="lowercase_delimited"'); """
     qt_tokenize_sql """ select tokenize("ǁŨǁe language", '"analyzer"="lowercase_delimited"'); """
     qt_tokenize_sql """ select tokenize("1080º Avalanche", '"analyzer"="lowercase_delimited"'); """
+    qt_tokenize_sql """ select tokenize("GET /images/hm_bg.jpg HTTP/1.0", '"analyzer"="basic_analyzer"'); """
+    qt_tokenize_sql """ select tokenize("让我们说「Hello」そして世界とつながろう！", '"analyzer"="icu_analyzer"'); """
      
     sql "DROP TABLE IF EXISTS ${indexTbName1}"
     sql """
@@ -139,8 +159,6 @@ suite("test_custom_analyzer", "p0") {
         }
     }
 
-
-
     try {
         sql "DROP TABLE IF EXISTS test_custom_analyzer_3"
         sql """
diff --git a/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer1.groovy b/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer1.groovy
index 1bede117f4d9c5..665cda3b34660f 100644
--- a/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer1.groovy
+++ b/regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer1.groovy
@@ -18,8 +18,6 @@
 import java.sql.SQLException
 
 suite("test_custom_analyzer1", "p0") {
-    def indexTbName1 = "test_custom_analyzer1"
-
     sql """
         CREATE INVERTED INDEX TOKEN_FILTER IF NOT EXISTS word_splitter_all
         PROPERTIES
@@ -34,22 +32,64 @@ suite("test_custom_analyzer1", "p0") {
     """
 
     sql """
-        CREATE INVERTED INDEX ANALYZER IF NOT EXISTS custom_standard_analyzer
+        CREATE INVERTED INDEX ANALYZER IF NOT EXISTS custom_standard_analyzer1
+        PROPERTIES
+        (
+            "tokenizer" = "standard",
+            "token_filter" = "asciifolding, word_splitter_all, lowercase"
+        );
+    """
+
+    sql """
+        CREATE INVERTED INDEX CHAR_FILTER IF NOT EXISTS char_replace_char_filter1
+        PROPERTIES
+        (
+            "type" = "char_replace",
+            "pattern" = "_"
+        );
+    """
+
+    sql """
+        CREATE INVERTED INDEX CHAR_FILTER IF NOT EXISTS char_replace_char_filter2
+        PROPERTIES
+        (
+            "type" = "char_replace",
+            "pattern" = "."
+        );
+    """
+
+    sql """
+        CREATE INVERTED INDEX ANALYZER IF NOT EXISTS custom_standard_analyzer2
         PROPERTIES
         (
-        "tokenizer" = "standard",
-        "token_filter" = "asciifolding, word_splitter_all, lowercase"
+            "tokenizer" = "standard",
+            "char_filter" = "char_replace_char_filter1, char_replace_char_filter2",
+            "token_filter" = "lowercase"
         );
     """
 
     sql """ select sleep(10) """
      
-    sql "DROP TABLE IF EXISTS ${indexTbName1}"
+    sql "DROP TABLE IF EXISTS test_custom_analyzer1"
+    sql """
+        CREATE TABLE test_custom_analyzer1 (
+            `a` bigint NOT NULL AUTO_INCREMENT(1),
+            `ch` text NULL,
+            INDEX idx_ch (`ch`) USING INVERTED PROPERTIES("support_phrase" = "true", "analyzer" = "custom_standard_analyzer1")
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`a`)
+        DISTRIBUTED BY RANDOM BUCKETS 1
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql "DROP TABLE IF EXISTS test_custom_analyzer2"
     sql """
-        CREATE TABLE ${indexTbName1} (
+        CREATE TABLE test_custom_analyzer2 (
             `a` bigint NOT NULL AUTO_INCREMENT(1),
             `ch` text NULL,
-            INDEX idx_ch (`ch`) USING INVERTED PROPERTIES("support_phrase" = "true", "analyzer" = "custom_standard_analyzer")
+            INDEX idx_ch (`ch`) USING INVERTED PROPERTIES("support_phrase" = "true", "analyzer" = "custom_standard_analyzer2")
         ) ENGINE=OLAP
         DUPLICATE KEY(`a`)
         DISTRIBUTED BY RANDOM BUCKETS 1
@@ -58,15 +98,20 @@ suite("test_custom_analyzer1", "p0") {
         );
     """
 
-    sql """ insert into ${indexTbName1} values(1, "A two-hour programme which included many forms of [[jazz]] from classic to Latin as well as a mix of jazz from the younger players of the day."); """
-    sql """ insert into ${indexTbName1} values(2, " with off-peak shows introducing more commercial breaks into their output, before the concept was dropped altogether in mid-2006."); """
+    sql """ insert into test_custom_analyzer1 values(1, "A two-hour programme which included many forms of [[jazz]] from classic to Latin as well as a mix of jazz from the younger players of the day."); """
+    sql """ insert into test_custom_analyzer1 values(2, " with off-peak shows introducing more commercial breaks into their output, before the concept was dropped altogether in mid-2006."); """
+
+    sql """ insert into test_custom_analyzer2 values(1, "GET /images/hm_bg.jpg HTTP/1.0"); """
 
     try {
         sql "sync"
         sql """ set enable_common_expr_pushdown = true; """
 
-        qt_sql """ select * from ${indexTbName1} where ch match 'with'; """
-        qt_sql """ select * from ${indexTbName1} where ch match 'the'; """
+        qt_sql """ select * from test_custom_analyzer1 where ch match 'with'; """
+        qt_sql """ select * from test_custom_analyzer1 where ch match 'the'; """
+
+        qt_sql """ select * from test_custom_analyzer2 where ch match 'hm'; """
+        qt_sql """ select * from test_custom_analyzer2 where ch match 'bg'; """
     } finally {
     }
 }
\ No newline at end of file