Skip to content

Commit 0258c7f

Browse files
committed
perf(dictionary): refactor DictEntryIterator and do partial sort
1 parent c7cb47e commit 0258c7f

6 files changed

+46
-62
lines changed

src/rime/dict/dictionary.cc

+18-35
Original file line numberDiff line numberDiff line change
@@ -53,43 +53,26 @@ size_t match_extra_code(const table::Code* extra_code, size_t depth,
5353

5454
} // namespace dictionary
5555

56-
DictEntryIterator::DictEntryIterator()
57-
: Base(), table_(NULL), entry_(), entry_count_(0) {
58-
}
59-
60-
DictEntryIterator::DictEntryIterator(const DictEntryIterator& other)
61-
: Base(other), table_(other.table_), entry_(other.entry_),
62-
entry_count_(other.entry_count_) {
63-
}
64-
65-
DictEntryIterator& DictEntryIterator::operator= (DictEntryIterator& other) {
66-
DLOG(INFO) << "swapping iterator contents.";
67-
swap(other);
68-
table_ = other.table_;
69-
entry_ = other.entry_;
70-
entry_count_ = other.entry_count_;
71-
return *this;
72-
}
73-
74-
bool DictEntryIterator::exhausted() const {
75-
return empty();
76-
}
77-
7856
void DictEntryIterator::AddChunk(dictionary::Chunk&& chunk, Table* table) {
79-
push_back(std::move(chunk));
57+
chunks_.push_back(std::move(chunk));
8058
entry_count_ += chunk.size;
8159
table_ = table;
8260
}
8361

8462
void DictEntryIterator::Sort() {
85-
sort(dictionary::compare_chunk_by_head_element);
63+
// partial-sort remaining chunks, move best match to chunk_index_
64+
std::partial_sort(
65+
chunks_.begin() + chunk_index_,
66+
chunks_.begin() + chunk_index_ + 1,
67+
chunks_.end(),
68+
dictionary::compare_chunk_by_head_element);
8669
}
8770

8871
void DictEntryIterator::PrepareEntry() {
89-
if (empty() || !table_) {
72+
if (exhausted() || !table_) {
9073
return;
9174
}
92-
const auto& chunk(front());
75+
const auto& chunk(chunks_[chunk_index_]);
9376
entry_ = New<DictEntry>();
9477
const auto& e(chunk.entries[chunk.cursor]);
9578
DLOG(INFO) << "creating temporary dict entry '"
@@ -105,7 +88,7 @@ void DictEntryIterator::PrepareEntry() {
10588
}
10689

10790
an<DictEntry> DictEntryIterator::Peek() {
108-
while (!entry_ && !empty()) {
91+
while (!entry_ && !exhausted()) {
10992
PrepareEntry();
11093
if (filter_ && !filter_(entry_)) {
11194
Next();
@@ -116,30 +99,30 @@ an<DictEntry> DictEntryIterator::Peek() {
11699

117100
bool DictEntryIterator::Next() {
118101
entry_.reset();
119-
if (empty()) {
102+
if (exhausted()) {
120103
return false;
121104
}
122-
auto& chunk(front());
105+
auto& chunk(chunks_[chunk_index_]);
123106
if (++chunk.cursor >= chunk.size) {
124-
pop_front();
107+
++chunk_index_;
125108
}
126109
else {
127-
// reorder chunks since front() has got a new head element
110+
// reorder chunks since the current chunk has got a new head element
128111
Sort();
129112
}
130-
return !empty();
113+
return !exhausted();
131114
}
132115

133116
bool DictEntryIterator::Skip(size_t num_entries) {
134117
while (num_entries > 0) {
135-
if (empty()) return false;
136-
auto& chunk(front());
118+
if (exhausted()) return false;
119+
auto& chunk(chunks_[chunk_index_]);
137120
if (chunk.cursor + num_entries < chunk.size) {
138121
chunk.cursor += num_entries;
139122
return true;
140123
}
141124
num_entries -= (chunk.size - chunk.cursor);
142-
pop_front();
125+
++chunk_index_;
143126
}
144127
return true;
145128
}

src/rime/dict/dictionary.h

+10-11
Original file line numberDiff line numberDiff line change
@@ -39,30 +39,29 @@ bool compare_chunk_by_leading_element(const Chunk& a, const Chunk& b);
3939

4040
} // namespace dictionary
4141

42-
class DictEntryIterator : protected list<dictionary::Chunk>,
43-
public DictEntryFilterBinder {
42+
class DictEntryIterator : public DictEntryFilterBinder {
4443
public:
45-
using Base = list<dictionary::Chunk>;
46-
47-
RIME_API DictEntryIterator();
48-
RIME_API DictEntryIterator(const DictEntryIterator& other);
49-
DictEntryIterator& operator= (DictEntryIterator& other);
44+
DictEntryIterator() = default;
45+
DictEntryIterator(DictEntryIterator&& other) = default;
46+
DictEntryIterator& operator= (DictEntryIterator&& other) = default;
5047

5148
void AddChunk(dictionary::Chunk&& chunk, Table* table);
5249
void Sort();
5350
RIME_API an<DictEntry> Peek();
5451
RIME_API bool Next();
5552
bool Skip(size_t num_entries);
56-
RIME_API bool exhausted() const;
53+
bool exhausted() const { return chunk_index_ == chunks_.size(); }
5754
size_t entry_count() const { return entry_count_; }
5855

5956
protected:
6057
void PrepareEntry();
6158

6259
private:
63-
Table* table_;
64-
an<DictEntry> entry_;
65-
size_t entry_count_;
60+
vector<dictionary::Chunk> chunks_;
61+
size_t chunk_index_ = 0;
62+
Table* table_ = nullptr;
63+
an<DictEntry> entry_ = nullptr;
64+
size_t entry_count_ = 0;
6665
};
6766

6867
struct DictEntryCollector : map<size_t, DictEntryIterator> {

src/rime/gear/reverse_lookup_translator.cc

+6-4
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ class ReverseLookupTranslation : public TableTranslation {
3232
const string& input,
3333
size_t start, size_t end,
3434
const string& preedit,
35-
const DictEntryIterator& iter,
35+
DictEntryIterator&& iter,
3636
bool quality)
37-
: TableTranslation(options, NULL, input, start, end, preedit, iter),
37+
: TableTranslation(
38+
options, NULL, input, start, end, preedit, std::move(iter)),
3839
dict_(dict), options_(options), quality_(quality) {
3940
}
4041
virtual an<Candidate> Peek();
@@ -185,7 +186,7 @@ an<Translation> ReverseLookupTranslator::Query(const string& input,
185186
auto collector = dict_->Lookup(graph, 0);
186187
if (collector && !collector->empty() &&
187188
collector->rbegin()->first == consumed) {
188-
iter = collector->rbegin()->second;
189+
iter = std::move(collector->rbegin()->second);
189190
quality = !graph.vertices.empty() &&
190191
(graph.vertices.rbegin()->second == kNormalSpelling);
191192
}
@@ -199,7 +200,8 @@ an<Translation> ReverseLookupTranslator::Query(const string& input,
199200
segment.start,
200201
segment.end,
201202
preedit,
202-
iter, quality);
203+
std::move(iter),
204+
quality);
203205
}
204206
return nullptr;
205207
}

src/rime/gear/table_translator.cc

+7-7
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ TableTranslation::TableTranslation(TranslatorOptions* options,
3333
size_t start,
3434
size_t end,
3535
const string& preedit,
36-
const DictEntryIterator& iter,
37-
const UserDictEntryIterator& uter)
36+
DictEntryIterator&& iter,
37+
UserDictEntryIterator&& uter)
3838
: options_(options), language_(language),
3939
input_(input), start_(start), end_(end), preedit_(preedit),
40-
iter_(iter), uter_(uter) {
40+
iter_(std::move(iter)), uter_(std::move(uter)) {
4141
if (options_)
4242
options_->preedit_formatter().Apply(&preedit_);
4343
CheckEmpty();
@@ -190,7 +190,7 @@ bool LazyTableTranslation::FetchMoreTableEntries() {
190190
}
191191
if (more.entry_count() > previous_entry_count) {
192192
more.Skip(previous_entry_count);
193-
iter_ = more;
193+
iter_ = std::move(more);
194194
}
195195
return true;
196196
}
@@ -276,8 +276,8 @@ an<Translation> TableTranslator::Query(const string& input,
276276
segment.start,
277277
segment.start + input.length(),
278278
preedit,
279-
iter,
280-
uter);
279+
std::move(iter),
280+
std::move(uter));
281281
}
282282
if (translation) {
283283
bool filter_by_charset = enable_charset_filter_ &&
@@ -618,7 +618,7 @@ TableTranslator::MakeSentence(const string& input, size_t start,
618618
entries[consumed_length] = iter.Peek();
619619
if (start_pos == 0 && !iter.exhausted()) {
620620
// also provide words for manual composition
621-
collector[consumed_length] = iter;
621+
collector[consumed_length] = std::move(iter);
622622
DLOG(INFO) << "table[" << consumed_length << "]: "
623623
<< collector[consumed_length].entry_count();
624624
}

src/rime/gear/table_translator.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ class TableTranslation : public Translation {
5656
size_t start,
5757
size_t end,
5858
const string& preedit,
59-
const DictEntryIterator& iter = DictEntryIterator(),
60-
const UserDictEntryIterator& uter = UserDictEntryIterator());
59+
DictEntryIterator&& iter = {},
60+
UserDictEntryIterator&& uter = {});
6161

6262
virtual bool Next();
6363
virtual an<Candidate> Peek();

test/dictionary_test.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ TEST_F(RimeDictionaryTest, ScriptLookup) {
7878
ASSERT_TRUE(bool(c));
7979

8080
ASSERT_TRUE(c->find(3) != c->end());
81-
rime::DictEntryIterator d3((*c)[3]);
81+
rime::DictEntryIterator& d3((*c)[3]);
8282
EXPECT_FALSE(d3.exhausted());
8383
auto e1 = d3.Peek();
8484
ASSERT_TRUE(bool(e1));
@@ -87,14 +87,14 @@ TEST_F(RimeDictionaryTest, ScriptLookup) {
8787
EXPECT_TRUE(d3.Next());
8888

8989
ASSERT_TRUE(c->find(5) != c->end());
90-
rime::DictEntryIterator d5((*c)[5]);
90+
rime::DictEntryIterator& d5((*c)[5]);
9191
EXPECT_FALSE(d5.exhausted());
9292
auto e2 = d5.Peek();
9393
ASSERT_TRUE(bool(e2));
9494
EXPECT_EQ(2, e2->code.size());
9595

9696
ASSERT_TRUE(c->find(7) != c->end());
97-
rime::DictEntryIterator d7((*c)[7]);
97+
rime::DictEntryIterator& d7((*c)[7]);
9898
EXPECT_FALSE(d7.exhausted());
9999
auto e3 = d7.Peek();
100100
ASSERT_TRUE(bool(e3));

0 commit comments

Comments
 (0)