17
17
#include < rime/dict/dictionary.h>
18
18
#include < rime/dict/user_dictionary.h>
19
19
#include < rime/gear/charset_filter.h>
20
+ #include < rime/gear/grammar.h>
20
21
#include < rime/gear/table_translator.h>
21
22
#include < rime/gear/translator_commons.h>
22
23
#include < rime/gear/unity_table_encoder.h>
@@ -216,6 +217,13 @@ TableTranslator::TableTranslator(const Ticket& ticket)
216
217
&encode_commit_history_);
217
218
config->GetInt (name_space_ + " /max_phrase_length" ,
218
219
&max_phrase_length_);
220
+ config->GetInt (name_space_ + " /max_homographs" ,
221
+ &max_homographs_);
222
+ if (enable_sentence_ || sentence_over_completion_) {
223
+ if (auto * grammar_component = Grammar::Require (" grammar" )) {
224
+ grammar_.reset (grammar_component->Create (config));
225
+ }
226
+ }
219
227
}
220
228
if (enable_encoder_ && user_dict_) {
221
229
encoder_.reset (new UnityTableEncoder (user_dict_.get ()));
@@ -231,7 +239,7 @@ static bool starts_with_completion(an<Translation> translation) {
231
239
}
232
240
233
241
an<Translation> TableTranslator::Query (const string& input,
234
- const Segment& segment) {
242
+ const Segment& segment) {
235
243
if (!segment.HasTag (tag_))
236
244
return nullptr ;
237
245
DLOG (INFO) << " input = '" << input
@@ -519,7 +527,7 @@ bool SentenceTranslation::PreferUserPhrase() const {
519
527
return false ;
520
528
}
521
529
522
- static size_t consume_trailing_delimiters (size_t pos,
530
+ inline static size_t consume_trailing_delimiters (size_t pos,
523
531
const string& input,
524
532
const string& delimiters) {
525
533
while (pos < input.length () &&
@@ -529,11 +537,25 @@ static size_t consume_trailing_delimiters(size_t pos,
529
537
return pos;
530
538
}
531
539
540
+ template <class Iter >
541
+ inline static void collect_entries (DictEntryList& dest,
542
+ Iter& iter,
543
+ int max_entries) {
544
+ if (dest.size () < max_entries && !iter.exhausted ()) {
545
+ dest.push_back (iter.Peek ());
546
+ // alters iter if collecting more than 1 entries
547
+ while (dest.size () < max_entries && iter.Next ()) {
548
+ dest.push_back (iter.Peek ());
549
+ }
550
+ }
551
+ }
552
+
532
553
an<Translation>
533
554
TableTranslator::MakeSentence (const string& input, size_t start,
534
555
bool include_prefix_phrases) {
535
556
bool filter_by_charset = enable_charset_filter_ &&
536
557
!engine_->context ()->get_option (" extended_charset" );
558
+ const int max_entries = max_homographs_;
537
559
DictEntryCollector collector;
538
560
UserDictEntryCollector user_phrase_collector;
539
561
map<int , an<Sentence>> sentences;
@@ -543,13 +565,14 @@ TableTranslator::MakeSentence(const string& input, size_t start,
543
565
continue ;
544
566
string active_input = input.substr (start_pos);
545
567
string active_key = active_input + ' ' ;
546
- vector<of<DictEntry>> entries (active_input. length () + 1 ) ;
568
+ UserDictEntryCollector collected_entries ;
547
569
// lookup dictionaries
548
570
if (user_dict_ && user_dict_->loaded ()) {
549
571
for (size_t len = 1 ; len <= active_input.length (); ++len) {
550
572
size_t consumed_length =
551
573
consume_trailing_delimiters (len, active_input, delimiters_);
552
- if (entries[consumed_length])
574
+ auto & dest (collected_entries[consumed_length]);
575
+ if (dest.size () >= max_entries)
553
576
continue ;
554
577
DLOG (INFO) << " active input: " << active_input << " [0, " << len << " )" ;
555
578
UserDictEntryIterator uter;
@@ -560,9 +583,15 @@ TableTranslator::MakeSentence(const string& input, size_t start,
560
583
uter.AddFilter (CharsetFilter::FilterDictEntry);
561
584
}
562
585
if (!uter.exhausted ()) {
563
- entries[consumed_length] = uter.Peek ();
586
+ if (start_pos == 0 && max_entries > 1 ) {
587
+ UserDictEntryIterator uter_copy (uter);
588
+ collect_entries (dest, uter_copy, max_entries);
589
+ } else {
590
+ collect_entries (dest, uter, max_entries);
591
+ }
564
592
if (start_pos == 0 ) {
565
593
// also provide words for manual composition
594
+ // uter must not be consumed
566
595
uter.Release (&user_phrase_collector[consumed_length]);
567
596
DLOG (INFO) << " user phrase[" << consumed_length << " ]: "
568
597
<< user_phrase_collector[consumed_length].size ();
@@ -578,7 +607,8 @@ TableTranslator::MakeSentence(const string& input, size_t start,
578
607
for (size_t len = 1 ; len <= active_input.length (); ++len) {
579
608
size_t consumed_length =
580
609
consume_trailing_delimiters (len, active_input, delimiters_);
581
- if (entries[consumed_length])
610
+ auto & dest (collected_entries[consumed_length]);
611
+ if (!dest.empty ())
582
612
continue ;
583
613
DLOG (INFO) << " active input: " << active_input << " [0, " << len << " )" ;
584
614
UserDictEntryIterator uter;
@@ -589,9 +619,15 @@ TableTranslator::MakeSentence(const string& input, size_t start,
589
619
uter.AddFilter (CharsetFilter::FilterDictEntry);
590
620
}
591
621
if (!uter.exhausted ()) {
592
- entries[consumed_length] = uter.Peek ();
622
+ if (start_pos == 0 && max_entries > 1 ) {
623
+ UserDictEntryIterator uter_copy (uter);
624
+ collect_entries (dest, uter_copy, max_entries);
625
+ } else {
626
+ collect_entries (dest, uter, max_entries);
627
+ }
593
628
if (start_pos == 0 ) {
594
629
// also provide words for manual composition
630
+ // uter must not be consumed
595
631
uter.Release (&user_phrase_collector[consumed_length]);
596
632
DLOG (INFO) << " unity phrase[" << consumed_length << " ]: "
597
633
<< user_phrase_collector[consumed_length].size ();
@@ -612,17 +648,24 @@ TableTranslator::MakeSentence(const string& input, size_t start,
612
648
continue ;
613
649
size_t consumed_length =
614
650
consume_trailing_delimiters (m.length , active_input, delimiters_);
615
- if (entries[consumed_length])
651
+ auto & dest (collected_entries[consumed_length]);
652
+ if (dest.size () >= max_entries)
616
653
continue ;
617
654
DictEntryIterator iter;
618
655
dict_->LookupWords (&iter, active_input.substr (0 , m.length ), false );
619
656
if (filter_by_charset) {
620
657
iter.AddFilter (CharsetFilter::FilterDictEntry);
621
658
}
622
659
if (!iter.exhausted ()) {
623
- entries[consumed_length] = iter.Peek ();
660
+ if (start_pos == 0 && max_entries - dest.size () > 1 ) {
661
+ DictEntryIterator iter_copy = iter;
662
+ collect_entries (dest, iter_copy, max_entries);
663
+ } else {
664
+ collect_entries (dest, iter, max_entries);
665
+ }
624
666
if (start_pos == 0 ) {
625
667
// also provide words for manual composition
668
+ // iter must not be consumed
626
669
collector[consumed_length] = std::move (iter);
627
670
DLOG (INFO) << " table[" << consumed_length << " ]: "
628
671
<< collector[consumed_length].entry_count ();
@@ -631,16 +674,20 @@ TableTranslator::MakeSentence(const string& input, size_t start,
631
674
}
632
675
}
633
676
for (size_t len = 1 ; len <= active_input.length (); ++len) {
634
- if (!entries[len])
677
+ const auto & entries (collected_entries[len]);
678
+ if (entries.empty ())
635
679
continue ;
636
680
size_t end_pos = start_pos + len;
637
- // create a new sentence
638
- auto new_sentence = New<Sentence>(*sentences[start_pos]);
639
- new_sentence->Extend (*entries[len], end_pos);
640
- // compare and update sentences
641
- if (sentences.find (end_pos) == sentences.end () ||
642
- sentences[end_pos]->weight () <= new_sentence->weight ()) {
643
- sentences[end_pos] = std::move (new_sentence);
681
+ bool is_rear = end_pos == input.length ();
682
+ for (const auto & entry : entries) {
683
+ // create a new sentence
684
+ auto new_sentence = New<Sentence>(*sentences[start_pos]);
685
+ new_sentence->Extend (*entry, end_pos, is_rear, grammar_.get ());
686
+ // compare and update sentences
687
+ if (sentences.find (end_pos) == sentences.end () ||
688
+ sentences[end_pos]->weight () <= new_sentence->weight ()) {
689
+ sentences[end_pos] = std::move (new_sentence);
690
+ }
644
691
}
645
692
}
646
693
}
0 commit comments