Skip to content

Commit

Permalink
Enable ik_max_word for IK analyzer (#2335)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

- Fix corruption during parallel indexing
- Enable none smart mode

Issue link:#2305

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
  • Loading branch information
yingfeng authored Dec 6, 2024
1 parent 38765e5 commit 076f58f
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 11 deletions.
9 changes: 8 additions & 1 deletion src/common/analyzer/ik/analyze_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import ik_dict;
module analyze_context;

namespace infinity {
AnalyzeContext::AnalyzeContext(Dictionary *dict) : dict_(dict) {
AnalyzeContext::AnalyzeContext(Dictionary *dict, bool ik_smart) : dict_(dict), ik_smart_(ik_smart) {
buff_offset_ = 0;
cursor_ = 0;
last_useless_char_num_ = 0;
Expand Down Expand Up @@ -108,6 +108,7 @@ Lexeme *AnalyzeContext::GetNextLexeme() {
break;
} else {
delete result;
result = nullptr;
}
}
return result;
Expand All @@ -125,6 +126,8 @@ void AnalyzeContext::Reset() {
}

void AnalyzeContext::Compound(Lexeme *result) {
if (!ik_smart_)
return;
if (!results_.empty()) {
if (Lexeme::TYPE_ARABIC == result->GetLexemeType()) {
Lexeme *next_lexeme = results_.front();
Expand All @@ -135,7 +138,9 @@ void AnalyzeContext::Compound(Lexeme *result) {
append_ok = result->Append(*next_lexeme, Lexeme::TYPE_CQUAN);
}
if (append_ok) {
Lexeme *r = results_.front();
results_.pop_front();
delete r;
}
}
if (Lexeme::TYPE_CNUM == result->GetLexemeType() && !results_.empty()) {
Expand All @@ -145,7 +150,9 @@ void AnalyzeContext::Compound(Lexeme *result) {
append_ok = result->Append(*next_lexeme, Lexeme::TYPE_CQUAN);
}
if (append_ok) {
Lexeme *r = results_.front();
results_.pop_front();
delete r;
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/common/analyzer/ik/analyze_context.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ public:

Dictionary *dict_{nullptr};

AnalyzeContext(Dictionary *dict);
bool ik_smart_{true};

AnalyzeContext(Dictionary *dict, bool is_smart = true);

int GetCursor() const { return cursor_; }

Expand Down
4 changes: 3 additions & 1 deletion src/common/analyzer/ik/dict_segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ Hit *DictSegment::Match(const Vector<wchar_t> &char_array, int begin, int length
ds = (*it).get();
}
} else if (!children_map_.empty()) {
ds = children_map_[key_char].get();
auto it = children_map_.find(key_char);
if (it != children_map_.end())
ds = it->second.get();
}

if (ds != nullptr) {
Expand Down
2 changes: 0 additions & 2 deletions src/common/analyzer/ik/dict_segment.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ private:
}
return children_map_;
}

void Migrate(Vector<UniquePtr<DictSegment>> &segment_array, HashMap<wchar_t, UniquePtr<DictSegment>> &segment_map);
};

} // namespace infinity
13 changes: 10 additions & 3 deletions src/common/analyzer/ik/ik_analyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace infinity {

IKAnalyzer::IKAnalyzer(const String &path) : dict_path_(path) {}

IKAnalyzer::IKAnalyzer(const IKAnalyzer &other) : own_dict_(false), fine_grained_(other.fine_grained_), dict_(other.dict_) { Init(); }
IKAnalyzer::IKAnalyzer(const IKAnalyzer &other) : own_dict_(false), ik_smart_(other.ik_smart_), dict_(other.dict_) { Init(); }

IKAnalyzer::~IKAnalyzer() {
if (own_dict_) {
Expand All @@ -30,11 +30,18 @@ IKAnalyzer::~IKAnalyzer() {
}

void IKAnalyzer::Init() {
context_ = MakeUnique<AnalyzeContext>(dict_);
context_ = MakeUnique<AnalyzeContext>(dict_, ik_smart_);
LoadSegmenters();
arbitrator_ = MakeUnique<IKArbitrator>();
}

void IKAnalyzer::SetFineGrained(bool fine_grained) {
ik_smart_ = !fine_grained;
if (context_.get()) {
context_->ik_smart_ = ik_smart_;
}
}

Status IKAnalyzer::Load() {
dict_ = new Dictionary(dict_path_);
Status load_status = dict_->Load();
Expand Down Expand Up @@ -77,7 +84,7 @@ int IKAnalyzer::AnalyzeImpl(const Term &input, void *data, HookType func) {
for (auto &segmenter : segmenters_) {
segmenter->Reset();
}
arbitrator_->Process(context_.get(), true);
arbitrator_->Process(context_.get(), ik_smart_);
context_->OutputToResult();
context_->MarkBufferOffset();
Lexeme *lexeme = nullptr;
Expand Down
4 changes: 2 additions & 2 deletions src/common/analyzer/ik/ik_analyzer.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public:

Status Load();

void SetFineGrained(bool fine_grained) { fine_grained_ = fine_grained; };
void SetFineGrained(bool fine_grained);

protected:
int AnalyzeImpl(const Term &input, void *data, HookType func) override;
Expand All @@ -45,7 +45,7 @@ private:

bool own_dict_{};

bool fine_grained_{false};
bool ik_smart_{true};

Dictionary *dict_{nullptr};

Expand Down
1 change: 0 additions & 1 deletion src/common/analyzer/ik/lexeme.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ Lexeme::Lexeme(int offset, int begin, int length, int lexeme_type) {

Lexeme *Lexeme::Copy() {
Lexeme *copy = new Lexeme(offset_, begin_, length_, lexeme_type_);
copy->lexeme_text_ = lexeme_text_;
return copy;
}

Expand Down

0 comments on commit 076f58f

Please sign in to comment.