7
7
//
8
8
#include < queue>
9
9
#include < boost/range/adaptor/reversed.hpp>
10
- #include < rime/dict/prism.h>
11
10
#include < rime/algo/syllabifier.h>
11
+ #include < rime/dict/corrector.h>
12
+ #include < rime/dict/prism.h>
13
+ #include " syllabifier.h"
12
14
13
15
namespace rime {
16
+ using namespace corrector ;
14
17
15
18
using Vertex = pair<size_t , SpellingType>;
16
19
using VertexQueue = std::priority_queue<Vertex,
@@ -35,16 +38,36 @@ int Syllabifier::BuildSyllableGraph(const string &input,
35
38
// record a visit to the vertex
36
39
if (graph->vertices .find (current_pos) == graph->vertices .end ())
37
40
graph->vertices .insert (vertex); // preferred spelling type comes first
38
- else
41
+ else {
42
+ // graph->vertices[current_pos] = std::min(vertex.second, graph->vertices[current_pos]);
39
43
continue ; // discard worse spelling types
44
+ }
40
45
41
46
if (current_pos > farthest)
42
47
farthest = current_pos;
43
48
DLOG (INFO) << " current_pos: " << current_pos;
44
49
45
50
// see where we can go by advancing a syllable
46
51
vector<Prism::Match> matches;
47
- prism.CommonPrefixSearch (input.substr (current_pos), &matches);
52
+ set<SyllableId> match_set;
53
+ auto current_input = input.substr (current_pos);
54
+ prism.CommonPrefixSearch (current_input, &matches);
55
+ for (auto &m : matches) {
56
+ match_set.insert (m.value );
57
+ }
58
+ if (enable_correction_) {
59
+ Corrections corrections;
60
+ corrector_->ToleranceSearch (prism, current_input, &corrections, 5 );
61
+ for (const auto &m : corrections) {
62
+ for (auto accessor = prism.QuerySpelling (m.first ); !accessor.exhausted (); accessor.Next ()) {
63
+ if (accessor.properties ().type == kNormalSpelling ) {
64
+ matches.push_back ({ m.first , m.second .length });
65
+ break ;
66
+ }
67
+ }
68
+ }
69
+ }
70
+
48
71
if (!matches.empty ()) {
49
72
auto & end_vertices (graph->edges [current_pos]);
50
73
for (const auto & m : matches) {
@@ -56,15 +79,15 @@ int Syllabifier::BuildSyllableGraph(const string &input,
56
79
++end_pos;
57
80
DLOG (INFO) << " end_pos: " << end_pos;
58
81
bool matches_input = (current_pos == 0 && end_pos == input.length ());
59
- SpellingMap spellings;
82
+ SpellingMap& spellings (end_vertices[end_pos]) ;
60
83
SpellingType end_vertex_type = kInvalidSpelling ;
61
84
// when spelling algebra is enabled,
62
85
// a spelling evaluates to a set of syllables;
63
86
// otherwise, it resembles exactly the syllable itself.
64
87
SpellingAccessor accessor (prism.QuerySpelling (m.value ));
65
88
while (!accessor.exhausted ()) {
66
89
SyllableId syllable_id = accessor.syllable_id ();
67
- SpellingProperties props = accessor.properties ();
90
+ EdgeProperties props ( accessor.properties () );
68
91
if (strict_spelling_ &&
69
92
matches_input &&
70
93
props.type != kNormalSpelling ) {
@@ -74,20 +97,29 @@ int Syllabifier::BuildSyllableGraph(const string &input,
74
97
props.end_pos = end_pos;
75
98
// add a syllable with properties to the edge's
76
99
// spelling-to-syllable map
77
- spellings.insert ({syllable_id, props});
100
+ if (match_set.find (m.value ) == match_set.end ()) {
101
+ props.is_correction = true ;
102
+ props.credibility = 0.01 ;
103
+ }
104
+ auto it = spellings.find (syllable_id);
105
+ if (it == spellings.end ()) {
106
+ spellings.insert ({syllable_id, props});
107
+ } else {
108
+ it->second .type = std::min (it->second .type , props.type );
109
+ }
78
110
// let end_vertex_type be the best (smaller) type of spelling
79
111
// that ends at the vertex
80
- if (end_vertex_type > props.type ) {
112
+ if (end_vertex_type > props.type && !props. is_correction ) {
81
113
end_vertex_type = props.type ;
82
114
}
83
115
}
84
116
accessor.Next ();
85
117
}
86
118
if (spellings.empty ()) {
87
119
DLOG (INFO) << " not spelt." ;
120
+ end_vertices.erase (end_pos);
88
121
continue ;
89
122
}
90
- end_vertices[end_pos].swap (spellings);
91
123
// find the best common type in a path up to the end vertex
92
124
// eg. pinyin "shurfa" has vertex type kNormalSpelling at position 3,
93
125
// kAbbreviation at position 4 and kAbbreviation at position 6
@@ -121,6 +153,10 @@ int Syllabifier::BuildSyllableGraph(const string &input,
121
153
// when there is a path of more favored type
122
154
SpellingType edge_type = kInvalidSpelling ;
123
155
for (auto k = j->second .begin (); k != j->second .end (); ) {
156
+ if (k->second .is_correction ) {
157
+ ++k;
158
+ continue ; // Don't care correction edges
159
+ }
124
160
if (k->second .type > last_type) {
125
161
j->second .erase (k++);
126
162
}
@@ -245,4 +281,9 @@ void Syllabifier::Transpose(SyllableGraph* graph) {
245
281
}
246
282
}
247
283
284
+ void Syllabifier::EnableCorrection (an<Corrector> corrector) {
285
+ enable_correction_ = true ;
286
+ corrector_ = std::move (corrector);
287
+ }
288
+
248
289
} // namespace rime
0 commit comments